diff --git a/.project b/.project new file mode 100644 index 0000000000000..e964c69ba36e8 --- /dev/null +++ b/.project @@ -0,0 +1,11 @@ + + + llvm-project + + + + + + + + diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 27649fd60da50..d4933452f61d8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -595,6 +595,9 @@ C++2b Feature Support CUDA/HIP Language Changes in Clang ---------------------------------- + - Allow the use of ``__noinline__`` as a keyword (instead of ``__attribute__((noinline))``) + in lambda declarations. + Objective-C Language Changes in Clang ------------------------------------- diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 1a4ec43705bc3..3fa641778d021 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -5723,7 +5723,7 @@ class OMPMapClause final : public OMPMappableExprListClause, size_t numTrailingObjects(OverloadToken) const { // There are varlist_size() of expressions, and varlist_size() of // user-defined mappers. - return 2 * varlist_size(); + return 2 * varlist_size() + 1; } size_t numTrailingObjects(OverloadToken) const { return getUniqueDeclarationsNum(); @@ -5737,7 +5737,7 @@ class OMPMapClause final : public OMPMappableExprListClause, OpenMPMapModifierKind MapTypeModifiers[NumberOfOMPMapClauseModifiers] = { OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, - OMPC_MAP_MODIFIER_unknown}; + OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown}; /// Location of map-type-modifiers for the 'map' clause. SourceLocation MapTypeModifiersLoc[NumberOfOMPMapClauseModifiers]; @@ -5838,6 +5838,11 @@ class OMPMapClause final : public OMPMappableExprListClause, /// Set colon location. void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; } + /// Set iterator modifier. + void setIteratorModifier(Expr *IteratorModifier) { + getTrailingObjects()[2 * varlist_size()] = IteratorModifier; + } + public: /// Creates clause with a list of variables \a VL. /// @@ -5850,6 +5855,7 @@ class OMPMapClause final : public OMPMappableExprListClause, /// \param ComponentLists Component lists used in the clause. /// \param UDMapperRefs References to user-defined mappers associated with /// expressions used in the clause. + /// \param IteratorModifier Iterator modifier. /// \param MapModifiers Map-type-modifiers. /// \param MapModifiersLoc Location of map-type-modifiers. /// \param UDMQualifierLoc C++ nested name specifier for the associated @@ -5862,7 +5868,7 @@ class OMPMapClause final : public OMPMappableExprListClause, Create(const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, MappableExprComponentListsRef ComponentLists, - ArrayRef UDMapperRefs, + ArrayRef UDMapperRefs, Expr *IteratorModifier, ArrayRef MapModifiers, ArrayRef MapModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId, @@ -5881,6 +5887,11 @@ class OMPMapClause final : public OMPMappableExprListClause, static OMPMapClause *CreateEmpty(const ASTContext &C, const OMPMappableExprListSizeTy &Sizes); + /// Fetches Expr * of iterator modifier. + Expr *getIteratorModifier() { + return getTrailingObjects()[2 * varlist_size()]; + } + /// Fetches mapping kind for the clause. OpenMPMapClauseKind getMapType() const LLVM_READONLY { return MapType; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 16cf932c3760b..eaf4a6db3600e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3749,8 +3749,8 @@ def OMPDeclareTargetDecl : InheritableAttr { let Documentation = [OMPDeclareTargetDocs]; let Args = [ EnumArgument<"MapType", "MapTypeTy", - [ "to", "link" ], - [ "MT_To", "MT_Link" ]>, + [ "to", "enter", "link" ], + [ "MT_To", "MT_Enter", "MT_Link" ]>, EnumArgument<"DevType", "DevTypeTy", [ "host", "nohost", "any" ], [ "DT_Host", "DT_NoHost", "DT_Any" ]>, diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 28da4ff72bc45..82fc6c047b5da 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1359,7 +1359,7 @@ def err_omp_unknown_map_type : Error< "incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'">; def err_omp_unknown_map_type_modifier : Error< "incorrect map type modifier, expected one of: 'always', 'close', 'mapper'" - "%select{|, 'present'}0%select{|, 'ompx_hold'}1">; + "%select{|, 'present'|, 'present', 'iterator'}0%select{|, 'ompx_hold'}1">; def err_omp_map_type_missing : Error< "missing map type">; def err_omp_map_type_modifier_missing : Error< @@ -1383,12 +1383,22 @@ def note_omp_assumption_clause_continue_here : Note<"the ignored tokens spans until here">; def err_omp_declare_target_unexpected_clause: Error< "unexpected '%0' clause, only %select{'device_type'|'to' or 'link'|'to', 'link' or 'device_type'|'device_type', 'indirect'|'to', 'link', 'device_type' or 'indirect'}1 clauses expected">; +def err_omp_declare_target_unexpected_clause_52: Error< + "unexpected '%0' clause, only %select{'device_type'|'enter' or 'link'|'enter', 'link' or 'device_type'|'device_type', 'indirect'|'enter', 'link', 'device_type' or 'indirect'}1 clauses expected">; def err_omp_begin_declare_target_unexpected_implicit_to_clause: Error< "unexpected '(', only 'to', 'link' or 'device_type' clauses expected for 'begin declare target' directive">; -def err_omp_declare_target_unexpected_clause_after_implicit_to: Error< +def err_omp_declare_target_wrong_clause_after_implicit_to: Error< "unexpected clause after an implicit 'to' clause">; +def err_omp_declare_target_wrong_clause_after_implicit_enter: Error< + "unexpected clause after an implicit 'enter' clause">; def err_omp_declare_target_missing_to_or_link_clause: Error< "expected at least one %select{'to' or 'link'|'to', 'link' or 'indirect'}0 clause">; +def err_omp_declare_target_missing_enter_or_link_clause: Error< + "expected at least one %select{'enter' or 'link'|'enter', 'link' or 'indirect'}0 clause">; +def err_omp_declare_target_unexpected_to_clause: Error< + "unexpected 'to' clause, use 'enter' instead">; +def err_omp_declare_target_unexpected_enter_clause: Error< + "unexpected 'enter' clause, use 'to' instead">; def err_omp_declare_target_multiple : Error< "%0 appears multiple times in clauses on the same declare target directive">; def err_omp_declare_target_indirect_device_type: Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a53a830ccc190..9a004945974ee 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10814,6 +10814,8 @@ def err_omp_depend_sink_source_with_modifier : Error< "depend modifier cannot be used with 'sink' or 'source' depend type">; def err_omp_depend_modifier_not_iterator : Error< "expected iterator specification as depend modifier">; +def err_omp_map_modifier_not_iterator : Error< + "expected iterator specification as map modifier">; def err_omp_linear_ordered : Error< "'linear' clause cannot be specified along with 'ordered' clause with a parameter">; def err_omp_unexpected_schedule_modifier : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 7d4b135c1de3b..1cd1df7e9b0de 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -253,8 +253,10 @@ LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.") LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") LANGOPT(OpenMPGPUThreadsPerTeam, 32, 256, "Number of threads per team for GPUs.") +LANGOPT(OpenMPTargetXteamReductionBlockSize, 32, 1024, "Number of threads in a block used by cross-team reduction.") LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPTargetIgnoreEnvVars , 1, 0, "Generate code assuming that device related environment variables can be ignored.") +LANGOPT(OpenMPTargetBigJumpLoop , 1, 0, "Use big jump loop code generation technique.") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.") LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.") diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index 4c0884e0a6424..26153853e09b3 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -131,6 +131,7 @@ OPENMP_MAP_KIND(release) OPENMP_MAP_MODIFIER_KIND(always) OPENMP_MAP_MODIFIER_KIND(close) OPENMP_MAP_MODIFIER_KIND(mapper) +OPENMP_MAP_MODIFIER_KIND(iterator) OPENMP_MAP_MODIFIER_KIND(present) // This is an OpenMP extension for the sake of OpenACC support. OPENMP_MAP_MODIFIER_KIND(ompx_hold) diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 82875aa0fafa6..7837c1b4e70c3 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -83,7 +83,7 @@ enum OpenMPMapModifierKind { OMPC_MAP_MODIFIER_last }; - /// Number of allowed map-type-modifiers. +/// Number of allowed map-type-modifiers. static constexpr unsigned NumberOfOMPMapClauseModifiers = OMPC_MAP_MODIFIER_last - OMPC_MAP_MODIFIER_unknown - 1; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 74d495c8dfeea..bb4374bfbdca1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2616,6 +2616,8 @@ def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_gpu_threads_per_team_EQ : Joined<["-"], "fopenmp-gpu-threads-per-team=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_target_xteam_reduction_blocksize_EQ : Joined<["-"], "fopenmp-target-xteam-reduction-blocksize=">, Group, + Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group, Flags<[CC1Option, NoArgumentUnused]>, HelpText<"Enable debugging in the OpenMP offloading device RTL">; def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group, Flags<[NoArgumentUnused]>; @@ -2630,6 +2632,14 @@ def fno_openmp_target_ignore_env_vars : Flag<["-"], "fno-openmp-target-ignore-en Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Assert that device related environment variables cannot be ignored while generating code">, MarshallingInfoFlag>; +def fopenmp_target_big_jump_loop : Flag<["-"], "fopenmp-target-big-jump-loop">, Group, + Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, + HelpText<"Use the big-jump-loop code generation technique if possible">, + MarshallingInfoFlag>; +def fno_openmp_target_big_jump_loop : Flag<["-"], "fno-openmp-target-big-jump-loop">, Group, + Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, + HelpText<"Do not use the big-jump-loop code generation technique">, + MarshallingInfoFlag>; def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, @@ -3724,12 +3734,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", NegFlag>, Group; def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, - HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, + HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">, Flags<[CC1Option]>, Values<"none,2,3,4,5">, NormalizedValuesScope<"TargetOptions">, NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>, - MarshallingInfoEnum, "COV_4">; + MarshallingInfoEnum, "COV_5">; defm code_object_v3_legacy : SimpleMFlag<"code-object-v3", "Legacy option to specify code object ABI V3", diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b32dfe158c8f3..ed3a8ebaea417 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11079,6 +11079,7 @@ class Sema final { QualType MapperType, SourceLocation StartLoc, DeclarationName VN); + void ActOnOpenMPIteratorVarDecl(VarDecl *VD); bool isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const; const ValueDecl *getOpenMPDeclareMapperVarName() const; @@ -11790,6 +11791,7 @@ class Sema final { /// Data used for processing a list of variables in OpenMP clauses. struct OpenMPVarListDataTy final { Expr *DepModOrTailExpr = nullptr; + Expr *IteratorExpr = nullptr; SourceLocation ColonLoc; SourceLocation RLoc; CXXScopeSpec ReductionOrMapperIdScopeSpec; @@ -11916,7 +11918,7 @@ class Sema final { SourceLocation EndLoc); /// Called on well-formed 'map' clause. OMPClause *ActOnOpenMPMapClause( - ArrayRef MapTypeModifiers, + Expr *IteratorModifier, ArrayRef MapTypeModifiers, ArrayRef MapTypeModifiersLoc, CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp index cecbd703ac61e..da842f6b190e7 100644 --- a/clang/lib/AST/AttrImpl.cpp +++ b/clang/lib/AST/AttrImpl.cpp @@ -137,7 +137,7 @@ void OMPDeclareTargetDeclAttr::printPrettyPragma( // Use fake syntax because it is for testing and debugging purpose only. if (getDevType() != DT_Any) OS << " device_type(" << ConvertDevTypeTyToStr(getDevType()) << ")"; - if (getMapType() != MT_To) + if (getMapType() != MT_To && getMapType() != MT_Enter) OS << ' ' << ConvertMapTypeTyToStr(getMapType()); if (Expr *E = getIndirectExpr()) { OS << " indirect("; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 34e75723b3f30..d4903352873e4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16053,9 +16053,13 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, if ((*I)->isValueDependent() || !EvaluateCallArg(PVD, *I, Call, Info) || Info.EvalStatus.HasSideEffects) { - // If evaluation fails, throw away the argument entirely. - if (APValue *Slot = Info.getParamSlot(Call, PVD)) - *Slot = APValue(); + // If evaluation fails, throw away the argument entirely unless I is + // value-dependent. In those cases, the condition above will short-circuit + // before calling `EvaluateCallArg` and no param slot is created. + if (!(*I)->isValueDependent()) { + if (APValue *Slot = Info.getParamSlot(Call, PVD)) + *Slot = APValue(); + } } // Ignore any side-effects from a failed evaluation. This is safe because diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index adfc1d542bb34..096b4e9f7f8aa 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1127,7 +1127,7 @@ OMPMapClause *OMPMapClause::Create( const ASTContext &C, const OMPVarListLocTy &Locs, ArrayRef Vars, ArrayRef Declarations, MappableExprComponentListsRef ComponentLists, ArrayRef UDMapperRefs, - ArrayRef MapModifiers, + Expr *IteratorModifier, ArrayRef MapModifiers, ArrayRef MapModifiersLoc, NestedNameSpecifierLoc UDMQualifierLoc, DeclarationNameInfo MapperId, OpenMPMapClauseKind Type, bool TypeIsImplicit, SourceLocation TypeLoc) { @@ -1150,7 +1150,7 @@ OMPMapClause *OMPMapClause::Create( void *Mem = C.Allocate( totalSizeToAlloc( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); OMPMapClause *Clause = new (Mem) @@ -1159,6 +1159,7 @@ OMPMapClause *OMPMapClause::Create( Clause->setVarRefs(Vars); Clause->setUDMapperRefs(UDMapperRefs); + Clause->setIteratorModifier(IteratorModifier); Clause->setClauseInfo(Declarations, ComponentLists); Clause->setMapType(Type); Clause->setMapLoc(TypeLoc); @@ -1171,10 +1172,12 @@ OMPMapClause::CreateEmpty(const ASTContext &C, void *Mem = C.Allocate( totalSizeToAlloc( - 2 * Sizes.NumVars, Sizes.NumUniqueDeclarations, + 2 * Sizes.NumVars + 1, Sizes.NumUniqueDeclarations, Sizes.NumUniqueDeclarations + Sizes.NumComponentLists, Sizes.NumComponents)); - return new (Mem) OMPMapClause(Sizes); + OMPMapClause *Clause = new (Mem) OMPMapClause(Sizes); + Clause->setIteratorModifier(nullptr); + return Clause; } OMPToClause *OMPToClause::Create( @@ -2216,16 +2219,27 @@ static void PrintMapper(raw_ostream &OS, T *Node, OS << Node->getMapperIdInfo() << ')'; } +template +static void PrintIterator(raw_ostream &OS, T *Node, + const PrintingPolicy &Policy) { + if (Expr *IteratorModifier = Node->getIteratorModifier()) + IteratorModifier->printPretty(OS, nullptr, Policy); +} + void OMPClausePrinter::VisitOMPMapClause(OMPMapClause *Node) { if (!Node->varlist_empty()) { OS << "map("; if (Node->getMapType() != OMPC_MAP_unknown) { for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) { if (Node->getMapTypeModifier(I) != OMPC_MAP_MODIFIER_unknown) { - OS << getOpenMPSimpleClauseTypeName(OMPC_map, - Node->getMapTypeModifier(I)); - if (Node->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_mapper) - PrintMapper(OS, Node, Policy); + if (Node->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_iterator) { + PrintIterator(OS, Node, Policy); + } else { + OS << getOpenMPSimpleClauseTypeName(OMPC_map, + Node->getMapTypeModifier(I)); + if (Node->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_mapper) + PrintMapper(OS, Node, Policy); + } OS << ','; } } diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index cf16c320580a9..ebad051cb0d3c 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -189,9 +189,7 @@ bool AMDGPUTargetInfo::initFeatureMap( case GK_GFX1101: case GK_GFX1100: Features["ci-insts"] = true; - Features["dot1-insts"] = true; Features["dot5-insts"] = true; - Features["dot6-insts"] = true; Features["dot7-insts"] = true; Features["dot8-insts"] = true; Features["dl-insts"] = true; diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 5e73a3cb8019a..c43d0ba5896f0 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H +#include "clang/Basic/AddressSpaces.h" #include "clang/Basic/TargetID.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index be9497563621a..72a01fbd629cf 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2335,8 +2335,13 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, getLangOpts().Sanitize.has(SanitizerKind::Memory) || getLangOpts().Sanitize.has(SanitizerKind::Return); + // Enable noundef attribute based on codegen options and + // skip adding the attribute to HIP device functions. + bool EnableNoundefAttrs = CodeGenOpts.EnableNoundefAttrs && + !(getLangOpts().HIP && getLangOpts().CUDAIsDevice); + // Determine if the return type could be partially undef - if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) { + if (EnableNoundefAttrs && HasStrictReturn) { if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect && DetermineNoUndef(RetTy, getTypes(), DL, RetAI)) RetAttrs.addAttribute(llvm::Attribute::NoUndef); @@ -2470,8 +2475,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, } // Decide whether the argument we're handling could be partially undef - if (CodeGenOpts.EnableNoundefAttrs && - DetermineNoUndef(ParamType, getTypes(), DL, AI)) { + if (EnableNoundefAttrs && DetermineNoUndef(ParamType, getTypes(), DL, AI)) { Attrs.addAttribute(llvm::Attribute::NoUndef); } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 73d08d8c9e0c8..79a21b8ac499d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2495,14 +2495,16 @@ static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType T) { llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - // Return an invalid address if variable is MT_To and unified - // memory is not enabled. For all other cases: MT_Link and - // MT_To with unified memory, return a valid address. - if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To && + // Return an invalid address if variable is MT_To (or MT_Enter starting with + // OpenMP 5.2) and unified memory is not enabled. For all other cases: MT_Link + // and MT_To (or MT_Enter) with unified memory, return a valid address. + if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 1abeedae4baf4..9051b1ee9852c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1665,7 +1665,8 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory))) { SmallString<64> PtrName; { @@ -1880,7 +1881,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); @@ -7715,7 +7717,8 @@ class MappableExprsHandler { if (llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { RequiresReference = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -10524,6 +10527,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( CGM, ParentName, cast(E)); break; + case OMPD_target_parallel_loop: + CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CGM, ParentName, cast(E)); + break; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: @@ -10676,7 +10683,8 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast(GD.getDecl())); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast(GD.getDecl())); return true; @@ -10713,7 +10721,8 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, int64_t VarSize; llvm::GlobalValue::LinkageTypes Linkage; - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !HasRequiresUnifiedSharedMemory) { Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; VarName = CGM.getMangledName(VD); @@ -10744,7 +10753,8 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."); if (*Res == OMPDeclareTargetDeclAttr::MT_Link) @@ -10781,12 +10791,14 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."); (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 50dd74653e78b..d7cb2f32b4325 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -665,6 +665,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -1040,59 +1041,6 @@ static int ComputeGenericWorkgroupSize(CodeGenModule &CGM, int WorkgroupSize) { return WorkgroupSizeWithMaster; } -int getWorkGroupSizeSPMDHelper(CodeGenModule &CGM, - const OMPExecutableDirective &D) { - // Honor block-size provided by command-line option. This logic must be kept - // in sync with metadata generation. If this option is not specified on the - // command line then the value used will be the 256. - int WorkGroupSz = CGM.getLangOpts().OpenMPGPUThreadsPerTeam; - - // Check block-size provided by thread_limit clause. We start with the - // maximum thread limit and lower it if user requests a lower thread limit. - int ThreadLimit = CGM.getTarget().getGridValue().GV_Max_WG_Size; - const auto *ThreadLimitClause = D.getSingleClause(); - if (ThreadLimitClause) { - Expr *ThreadLimitExpr = ThreadLimitClause->getThreadLimit(); - clang::Expr::EvalResult Result; - if (ThreadLimitExpr->EvaluateAsInt(Result, CGM.getContext())) { - int ThreadLimitEval = Result.Val.getInt().getExtValue(); - if (ThreadLimitEval > 0 && ThreadLimitEval < ThreadLimit) - ThreadLimit = ThreadLimitEval; - } - } - - // If the command line work group size is less than any default or user - // specified thread limit then it is honored otherwise the thread limit - // determined above will be used. - if (WorkGroupSz > ThreadLimit) - WorkGroupSz = ThreadLimit; - - // Set the actual number of threads if the user requests a value different - // then the default. If the value is greater than the currently computed - // thread limit then cap the number of threads to the thread limit. - int NumThreads = CGM.getTarget().getGridValue().GV_Default_WG_Size; - const auto *NumThreadsClause = D.getSingleClause(); - if (NumThreadsClause) { - Expr *NumThreadsExpr = NumThreadsClause->getNumThreads(); - clang::Expr::EvalResult Result; - if (NumThreadsExpr->EvaluateAsInt(Result, CGM.getContext())) { - NumThreads = Result.Val.getInt().getExtValue(); - // Cap the number of threads to the current thread limit. - if (NumThreads > ThreadLimit) - NumThreads = ThreadLimit; - // num_threads clause takes precendence over the command line value: - WorkGroupSz = NumThreads; - } - } - - // Sanitize the workgroup size received from the command line. Its default - // value is GV_Default_WG_Size. - if (WorkGroupSz < 1 || WorkGroupSz > ThreadLimit) - WorkGroupSz = CGM.getTarget().getGridValue().GV_Default_WG_Size; - - return WorkGroupSz; -} - void CGOpenMPRuntimeGPU::GenerateMetaData(CodeGenModule &CGM, const OMPExecutableDirective &D, llvm::Function *&OutlinedFn, @@ -1109,13 +1057,11 @@ void CGOpenMPRuntimeGPU::GenerateMetaData(CodeGenModule &CGM, isOpenMPParallelDirective(D.getDirectiveKind()) || CGM.isXteamRedKernel(CGM.getSingleForStmt(D.getAssociatedStmt()))) { // Call the work group size calculation for SPMD mode loops. - compileTimeThreadLimit = getWorkGroupSizeSPMDHelper(CGM, D); + compileTimeThreadLimit = CGM.getWorkGroupSizeSPMDHelper(D); - // Xteam reduction overrides the command-line option and other settings - // for now: blocksize hardcoded to 1024. - // TODO: remove this restriction. - if (CGM.isXteamRedKernel(CGM.getSingleForStmt(D.getAssociatedStmt()))) - compileTimeThreadLimit = 1024; + // Apply Xteam reduction constraints on blocksize. + if (CGM.isXteamRedKernel(D)) + compileTimeThreadLimit = CGM.getXteamRedBlockSize(D); // Add kernel metadata if ThreadLimit Clause is compile time constant > 0 if (compileTimeThreadLimit > 0) { @@ -1282,6 +1228,22 @@ void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); } +static OMPTgtExecModeFlags +computeExecutionMode(bool Mode, const Stmt *DirectiveStmt, CodeGenModule &CGM) { + if (!Mode) + return OMP_TGT_EXEC_MODE_GENERIC; + if (DirectiveStmt) { + if (CGM.isNoLoopKernel(DirectiveStmt)) + return OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; + if (CGM.isBigJumpLoopKernel(CGM.getSingleForStmt(DirectiveStmt))) + return OMP_TGT_EXEC_MODE_SPMD_BIG_JUMP_LOOP; + const Stmt *S = CGM.getSingleForStmt(DirectiveStmt); + if (S && CGM.isXteamRedKernel(S)) + return OMP_TGT_EXEC_MODE_XTEAM_RED; + } + return OMP_TGT_EXEC_MODE_SPMD; +} + void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -1313,29 +1275,32 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( } emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - } else + } else { emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + DEBUG_WITH_TYPE( + NO_LOOP_XTEAM_RED, + CGM.emitNxResult("[No-Loop/Xteam]", D, CodeGenModule::NxNonSPMD)); + } - setPropertyExecutionMode( - CGM, OutlinedFn->getName(), - Mode ? (DirectiveStmt && CGM.isNoLoopKernel(DirectiveStmt) - ? OMP_TGT_EXEC_MODE_SPMD_NO_LOOP - : (DirectiveStmt && CGM.isXteamRedKernel( - CGM.getSingleForStmt(DirectiveStmt)) - ? OMP_TGT_EXEC_MODE_XTEAM_RED - : OMP_TGT_EXEC_MODE_SPMD)) - : OMP_TGT_EXEC_MODE_GENERIC); - // Reset no-loop or xteam reduction kernel metadata if it exists - if (Mode && DirectiveStmt && CGM.isNoLoopKernel(DirectiveStmt)) - CGM.resetNoLoopKernel(DirectiveStmt); - else if (Mode && DirectiveStmt && - CGM.isXteamRedKernel(CGM.getSingleForStmt(DirectiveStmt))) - CGM.resetXteamRedKernel(CGM.getSingleForStmt(DirectiveStmt)); + setPropertyExecutionMode(CGM, OutlinedFn->getName(), + computeExecutionMode(Mode, DirectiveStmt, CGM)); + + // Reset specialized kernel metadata if it exists + if (Mode && DirectiveStmt) { + if (CGM.isNoLoopKernel(DirectiveStmt)) + CGM.resetNoLoopKernel(DirectiveStmt); + else if (CGM.isBigJumpLoopKernel(CGM.getSingleForStmt(DirectiveStmt))) + CGM.resetBigJumpLoopKernel(CGM.getSingleForStmt(DirectiveStmt)); + else if (CGM.isXteamRedKernel(CGM.getSingleForStmt(DirectiveStmt))) + CGM.resetXteamRedKernel(CGM.getSingleForStmt(DirectiveStmt)); + } // Reset cached mode CGM.setIsSPMDExecutionMode(false); assert(!CGM.isNoLoopKernel(DirectiveStmt) && "No-loop attribute not reset after emit"); + assert(!CGM.isBigJumpLoopKernel(CGM.getSingleForStmt(DirectiveStmt)) && + "Big jump loop attribute not reset after emit"); assert(!CGM.isXteamRedKernel(CGM.getSingleForStmt(DirectiveStmt)) && "Xteam reduction attribute not reset after emit"); } @@ -4237,13 +4202,12 @@ CGOpenMPRuntimeGPU::getGPUCompleteBlockSize(CodeGenFunction &CGF, // Get effects of thread-controlling clauses on the current number of threads // and any command line requests: - return llvm::ConstantInt::get(CGF.Int32Ty, - getWorkGroupSizeSPMDHelper(CGM, D)); + return llvm::ConstantInt::get(CGF.Int32Ty, CGM.getWorkGroupSizeSPMDHelper(D)); } -llvm::Value *CGOpenMPRuntimeGPU::getXteamRedBlockSize(CodeGenFunction &CGF) { - // For now, this is hardcoded to 1024 - return llvm::ConstantInt::get(CGF.Int32Ty, 1024); +llvm::Value *CGOpenMPRuntimeGPU::getXteamRedBlockSize(CodeGenFunction &CGF, + int BlockSize) { + return llvm::ConstantInt::get(CGF.Int32Ty, BlockSize); } llvm::Value *CGOpenMPRuntimeGPU::getGPUNumBlocks(CodeGenFunction &CGF) { @@ -4306,7 +4270,7 @@ CGOpenMPRuntimeGPU::getXteamRedFunctionPtrs(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntimeGPU::getXteamRedSum( CodeGenFunction &CGF, llvm::Value *Val, llvm::Value *SumPtr, llvm::Value *DTeamVals, llvm::Value *DTeamsDonePtr, - llvm::Value *ThreadStartIndex, llvm::Value *NumTeams) { + llvm::Value *ThreadStartIndex, llvm::Value *NumTeams, int BlockSize) { // TODO handle more types llvm::Type *SumType = Val->getType(); assert( @@ -4332,29 +4296,121 @@ llvm::Value *CGOpenMPRuntimeGPU::getXteamRedSum( if (SumType->isIntegerTy()) { if (SumType->getPrimitiveSizeInBits() == 32) { + switch (BlockSize) { + case 64: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ui_1x64), + Args); + case 128: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ui_2x64), + Args); + case 256: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ui_4x64), + Args); + case 512: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ui_8x64), + Args); + case 1024: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_xteamr_ui_16x64), + Args); + } + } + if (SumType->getPrimitiveSizeInBits() == 64) { + switch (BlockSize) { + case 64: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ul_1x64), + Args); + case 128: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ul_2x64), + Args); + case 256: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ul_4x64), + Args); + case 512: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_ul_8x64), + Args); + case 1024: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_xteamr_ul_16x64), + Args); + } + } + } + if (SumType->isFloatTy()) { + switch (BlockSize) { + case 64: return CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_xteamr_ui_16x64), + OMPRTL___kmpc_xteamr_f_1x64), Args); - } - if (SumType->getPrimitiveSizeInBits() == 64) { + case 128: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_f_2x64), + Args); + case 256: return CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_xteamr_ul_16x64), + OMPRTL___kmpc_xteamr_f_4x64), + Args); + case 512: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_f_8x64), + Args); + case 1024: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_f_16x64), Args); } } - if (SumType->isFloatTy()) { - return CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_xteamr_f_16x64), - Args); - } if (SumType->isDoubleTy()) { - return CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_xteamr_d_16x64), - Args); + switch (BlockSize) { + case 64: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_d_1x64), + Args); + case 128: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_d_2x64), + Args); + case 256: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_d_4x64), + Args); + case 512: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_d_8x64), + Args); + case 1024: + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_xteamr_d_16x64), + Args); + } } llvm_unreachable("No support for other types currently."); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 00b2fb5b5b4d5..d2bde52682550 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -199,7 +199,7 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { llvm::Value *getGPUNumBlocks(CodeGenFunction &CGF); /// Get the number of blocks on the GPU for special reduction - llvm::Value *getXteamRedBlockSize(CodeGenFunction &CGF); + llvm::Value *getXteamRedBlockSize(CodeGenFunction &CGF, int BlockSize); std::pair getXteamRedFunctionPtrs(CodeGenFunction &CGF, llvm::Type *RedVarType); @@ -209,7 +209,7 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { llvm::Value *SumPtr, llvm::Value *DTeamVals, llvm::Value *DTeamsDonePtr, llvm::Value *ThreadStartIndex, - llvm::Value *NumTeams); + llvm::Value *NumTeams, int BlockSize); /// Returns whether the current architecture supports fast FP atomics bool supportFastFPAtomics() override; diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index d2947e887f58a..d0399e3e7f22e 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -69,9 +69,10 @@ llvm::Value *CodeGenFunction::applyNoLoopInc(const Expr *Inc, } std::pair -CodeGenFunction::EmitXteamRedStartingIndex(const ForStmt &FStmt) { +CodeGenFunction::EmitBigJumpLoopStartingIndex(const ForStmt &FStmt) { const CodeGenModule::NoLoopIntermediateStmts &Directives = - CGM.getXteamRedStmts(&FStmt); + CGM.isXteamRedKernel(&FStmt) ? CGM.getXteamRedStmts(&FStmt) + : CGM.getBigJumpLoopStmts(&FStmt); assert(Directives.size() > 0 && isa(Directives.back()) && "Appropriate directive not found"); const OMPLoopDirective &LD = *(cast(Directives.back())); @@ -86,7 +87,10 @@ CodeGenFunction::EmitXteamRedStartingIndex(const ForStmt &FStmt) { llvm::Value *GpuThreadId = RT.getGPUThreadID(*this); // workgroup_size - llvm::Value *WorkGroupSize = RT.getXteamRedBlockSize(*this); + llvm::Value *WorkGroupSize = + CGM.isXteamRedKernel(&FStmt) + ? RT.getXteamRedBlockSize(*this, CGM.getXteamRedBlockSize(&FStmt)) + : RT.getXteamRedBlockSize(*this, CGM.getBigJumpLoopBlockSize(&FStmt)); // workgroup_id llvm::Value *WorkGroupId = RT.getGPUBlockID(*this); @@ -106,20 +110,22 @@ CodeGenFunction::EmitXteamRedStartingIndex(const ForStmt &FStmt) { Builder.CreateIntCast(GlobalGpuThreadId, IvAddr.getElementType(), false); llvm::Value *Iv = Builder.CreateAdd(Gtid, Builder.CreateLoad(IvAddr)); - // Cache the thread specific initial loop iteration value and the number of - // teams - CGM.updateXteamRedKernel(&FStmt, Builder.CreateIntCast(Iv, Int64Ty, false), - RT.getGPUNumBlocks(*this)); - + if (CGM.isXteamRedKernel(&FStmt)) { + // Cache the thread specific initial loop iteration value and the number of + // teams + CGM.updateXteamRedKernel(&FStmt, Builder.CreateIntCast(Iv, Int64Ty, false), + RT.getGPUNumBlocks(*this)); + } // Set the initial value of the loop iteration Builder.CreateStore(Iv, IvAddr); return std::make_pair(LoopVD, IvAddr); } -void CodeGenFunction::EmitXteamRedUpdates(const ForStmt &FStmt) { +void CodeGenFunction::EmitBigJumpLoopUpdates(const ForStmt &FStmt) { const CodeGenModule::NoLoopIntermediateStmts &Directives = - CGM.getXteamRedStmts(&FStmt); + CGM.isXteamRedKernel(&FStmt) ? CGM.getXteamRedStmts(&FStmt) + : CGM.getBigJumpLoopStmts(&FStmt); assert(Directives.size() > 0 && isa(Directives.back()) && "Appropriate directive not found"); const OMPLoopDirective &LD = *(cast(Directives.back())); @@ -128,18 +134,25 @@ void CodeGenFunction::EmitXteamRedUpdates(const ForStmt &FStmt) { EmitIgnoredExpr(UE); } -void CodeGenFunction::EmitXteamRedInc(const ForStmt &FStmt, - const VarDecl *LoopVD, - const Address &NoLoopIvAddr) { +void CodeGenFunction::EmitBigJumpLoopInc(const ForStmt &FStmt, + const VarDecl *LoopVD, + const Address &NoLoopIvAddr) { const CodeGenModule::NoLoopIntermediateStmts &Directives = - CGM.getXteamRedStmts(&FStmt); + CGM.isXteamRedKernel(&FStmt) ? CGM.getXteamRedStmts(&FStmt) + : CGM.getBigJumpLoopStmts(&FStmt); assert(Directives.size() > 0 && isa(Directives.back()) && "Appropriate directive not found"); const OMPLoopDirective &LD = *(cast(Directives.back())); auto &RT = static_cast(CGM.getOpenMPRuntime()); - llvm::Value *BlockSize = RT.getXteamRedBlockSize(*this); - llvm::Value *NumBlocks = CGM.getXteamRedNumTeams(&FStmt); + llvm::Value *BlockSize = + CGM.isXteamRedKernel(&FStmt) + ? RT.getXteamRedBlockSize(*this, CGM.getXteamRedBlockSize(&FStmt)) + : RT.getXteamRedBlockSize(*this, CGM.getBigJumpLoopBlockSize(&FStmt)); + + llvm::Value *NumBlocks = CGM.isXteamRedKernel(&FStmt) + ? CGM.getXteamRedNumTeams(&FStmt) + : RT.getGPUNumBlocks(*this); assert(NumBlocks && "Number of blocks cannot be null"); // prod = block_size * num_blocks llvm::Value *Prod = Builder.CreateMul(BlockSize, NumBlocks); @@ -195,7 +208,6 @@ CodeGenFunction::EmitNoLoopIV(const OMPLoopDirective &LD) { // Emit init of the iteration variable EmitIgnoredExpr(LD.getInit()); - return std::make_pair(IVDecl, GetAddrOfLocalVar(IVDecl)); } @@ -290,6 +302,19 @@ void CodeGenFunction::EmitNoLoopKernel(const OMPExecutableDirective &D, } } +void CodeGenFunction::EmitBigJumpLoopKernel(const OMPExecutableDirective &D, + SourceLocation Loc) { + if (!HaveInsertPoint()) + EnsureInsertPoint(); + + // We expect one FOR stmt for the OpenMP directive + const ForStmt *CapturedForStmt = CGM.getSingleForStmt(D.getAssociatedStmt()); + assert(CapturedForStmt && "Cannot generate kernel for null captured stmt"); + + // The BigJump loop will be generated during the following statement emit. + EmitStmt(CapturedForStmt); +} + void CodeGenFunction::EmitXteamRedKernel( const OMPExecutableDirective &D, const Stmt *S, const FunctionArgList &Args, const CodeGenModule::NoLoopIntermediateStmts &IntermediateStmts, @@ -315,7 +340,7 @@ void CodeGenFunction::EmitXteamRedKernel( EmitStmt(CapturedForStmt); // Now emit the calls to xteam_sum, one for each reduction variable - EmitXteamRedSum(CapturedForStmt, Args); + EmitXteamRedSum(CapturedForStmt, Args, CGM.getXteamRedBlockSize(D)); // Xteam codegen done CGM.setCurrentXteamRedStmt(nullptr); @@ -355,7 +380,8 @@ void CodeGenFunction::EmitXteamLocalAggregator(const ForStmt *FStmt) { // Emit __kmpc_xteam_sum(*xteam_red_local_addr, red_var_addr) for each reduction // in the helper map for the given For Stmt void CodeGenFunction::EmitXteamRedSum(const ForStmt *FStmt, - const FunctionArgList &Args) { + const FunctionArgList &Args, + int BlockSize) { auto &RT = static_cast(CGM.getOpenMPRuntime()); const CodeGenModule::XteamRedVarMap &RedVarMap = CGM.getXteamRedVarMap(FStmt); llvm::Value *ThreadStartIdx = CGM.getXteamRedThreadStartIndex(FStmt); @@ -379,7 +405,7 @@ void CodeGenFunction::EmitXteamRedSum(const ForStmt *FStmt, // Pass in OrigRedVarAddr.getPointer to kmpc_xteam_sum RT.getXteamRedSum(*this, Builder.CreateLoad(RVI.RedVarAddr), OrigRedVarAddr.getPointer(), DTeamVals, DTeamsDonePtr, - ThreadStartIdx, NumTeams); + ThreadStartIdx, NumTeams, BlockSize); } } @@ -832,8 +858,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { EmitOMPGenericLoopDirective(cast(*S)); break; case Stmt::OMPTeamsGenericLoopDirectiveClass: - llvm_unreachable("teams loop directive not supported yet."); - // EmitOMPTeamsGenericLoopDirective(cast(*S)); + EmitOMPTeamsGenericLoopDirective(cast(*S)); break; case Stmt::OMPTargetTeamsGenericLoopDirectiveClass: EmitOMPTargetTeamsGenericLoopDirective( @@ -844,9 +869,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { cast(*S)); break; case Stmt::OMPTargetParallelGenericLoopDirectiveClass: - llvm_unreachable("target parallel loop directive not supported yet."); - // EmitOMPTargetParallelGenericLoopDirective( - // cast(*S)); + EmitOMPTargetParallelGenericLoopDirective( + cast(*S)); break; case Stmt::OMPParallelMaskedDirectiveClass: llvm_unreachable("parallel masked directive not supported yet."); @@ -1454,27 +1478,29 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, LexicalScope ForScope(*this, S.getSourceRange()); - Address XteamRedIvAddr = Address::invalid(); + Address BigJumpLoopIvAddr = Address::invalid(); const VarDecl *LoopVar = nullptr; - const OMPLoopDirective *XteamLD = nullptr; - if (CGM.getLangOpts().OpenMPIsDevice && CGM.isXteamRedKernel(&S)) { + const OMPLoopDirective *BigJumpLoopLD = nullptr; + if (CGM.getLangOpts().OpenMPIsDevice && + (CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) { const CodeGenModule::NoLoopIntermediateStmts &Directives = - CGM.getXteamRedStmts(&S); + CGM.isXteamRedKernel(&S) ? CGM.getXteamRedStmts(&S) + : CGM.getBigJumpLoopStmts(&S); assert(Directives.size() > 0 && isa(Directives.back()) && "Appropriate directive not found"); - XteamLD = cast(Directives.back()); + BigJumpLoopLD = cast(Directives.back()); std::pair LoopVarInfo = - EmitXteamRedStartingIndex(S); + EmitBigJumpLoopStartingIndex(S); LoopVar = LoopVarInfo.first; - XteamRedIvAddr = LoopVarInfo.second; + BigJumpLoopIvAddr = LoopVarInfo.second; } else { // Evaluate the first part before the loop. if (S.getInit()) EmitStmt(S.getInit()); } - const Expr *CondExpr = XteamLD ? XteamLD->getCond() : S.getCond(); + const Expr *CondExpr = BigJumpLoopLD ? BigJumpLoopLD->getCond() : S.getCond(); // Start the loop with a block that tests the condition. // If there's an increment, the continue scope will be overwritten @@ -1559,18 +1585,21 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, // a compound statement. RunCleanupsScope BodyScope(*this); - if (CGM.getLangOpts().OpenMPIsDevice && CGM.isXteamRedKernel(&S)) { - EmitXteamRedUpdates(S); - EmitOMPNoLoopBody(*XteamLD); + if (CGM.getLangOpts().OpenMPIsDevice && + (CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) { + EmitBigJumpLoopUpdates(S); + EmitOMPNoLoopBody(*BigJumpLoopLD); } else { EmitStmt(S.getBody()); } } - if (CGM.getLangOpts().OpenMPIsDevice && CGM.isXteamRedKernel(&S)) { + if (CGM.getLangOpts().OpenMPIsDevice && + (CGM.isXteamRedKernel(&S) || CGM.isBigJumpLoopKernel(&S))) { EmitBlock(Continue.getBlock()); - EmitXteamRedInc(S, LoopVar, - XteamRedIvAddr); // *iv = *iv + num_teams * num_threads + EmitBigJumpLoopInc( + S, LoopVar, + BigJumpLoopIvAddr); // *iv = *iv + num_teams * num_threads } else { // If there is an increment, emit it next. if (S.getInc()) { diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c7076d35bbb7f..e1c93bf0a0d62 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -848,12 +848,15 @@ llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction( // Generate specialized kernels for device only if (CGM.getLangOpts().OpenMPIsDevice && D.hasAssociatedStmt() && - CGM.isNoLoopKernel(D.getAssociatedStmt())) { + (CGM.isNoLoopKernel(D.getAssociatedStmt()) || + (FStmt && CGM.isBigJumpLoopKernel(FStmt)))) { OMPPrivateScope PrivateScope(*this); EmitOMPPrivateClause(D, PrivateScope); (void)PrivateScope.Privatize(); - - EmitNoLoopKernel(D, Loc); + if (CGM.isNoLoopKernel(D.getAssociatedStmt())) + EmitNoLoopKernel(D, Loc); + else + EmitBigJumpLoopKernel(D, Loc); } else if (CGM.getLangOpts().OpenMPIsDevice && isXteamKernel) { OMPPrivateScope PrivateScope(*this); EmitOMPPrivateClause(D, PrivateScope); @@ -8176,12 +8179,14 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( /// A 'loop' construct is supposed to be a work distribution construct by /// default unless its binding region is the innermost enclosing parallel -/// region. For now, we are defaulting to work sharing as an experiment to -/// determine how best to implement 'loop' and its combined forms especially -/// as part of the 'target teams loop' directive). Note that this code is -/// equivalent to how 'for' is implemented (when not using OpenMPIRBuilder). +/// region, in which case it is a worksharing region. Because we currently +/// have no way to know if this is true, for now emit them as inlined loops. void CodeGenFunction::EmitOMPGenericLoopDirective( const OMPLoopDirective &S) { +#if 0 + // TODO: A 'loop' construct is worksharing only if its binding region is + // the innermost enclosing parallel region. Until we can determine + // this, 'loop' should be emitted as inlined. bool HasLastprivates = false; auto &&CodeGen = [this, &S, &HasLastprivates] (CodeGenFunction &CGF, PrePostActionTy &) { @@ -8199,6 +8204,14 @@ void CodeGenFunction::EmitOMPGenericLoopDirective( CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_loop); // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); +#else + // Just inline the underlying statement for now. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + CGF.EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); +#endif } /// Equivalent to 'parallel for' except for handling of clauses that don't @@ -8225,24 +8238,14 @@ void CodeGenFunction::EmitOMPParallelGenericLoopDirective( /// Emit code for 'teams loop' void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( const OMPTeamsGenericLoopDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - Action.Enter(CGF); - // FIXME: Should be able to emit with generic loop code, but it doesn't - // work right now. - CGF.EmitOMPGenericLoopDirective(S); - }; - emitCommonOMPTeamsDirective(*this, S, OMPD_loop, CodeGen); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); -} - -/// Emit code for 'target parallel loop' -void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( - const OMPTargetParallelGenericLoopDirective &S) { + // For now, emit as the two combined directives 'parallel' and 'loop'. + // This is similar to what we do for 'target teams loop'. Eventually, + // 'distribute' will be added so that 'teams loop' fully emulates + // 'teams distribute parallel for'. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { CGF.EmitOMPParallelGenericLoopDirective(S); }; - emitCommonOMPTargetDirective(*this, S, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_loop, CodeGen); } static void emitTargetTeamsGenericLoopRegion( @@ -8283,6 +8286,47 @@ void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( "Target device function emission failed for 'target teams loop'."); } +static void emitTargetParallelGenericLoopRegion( + CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with (worksharing) 'loop' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_loop, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S) { + // Emit target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +/// Emit code for 'target parallel loop' +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { if (const auto *SD = dyn_cast(&D)) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2af2c924371a5..6cba6927ef4a5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -404,6 +404,7 @@ class CodeGenFunction : public CodeGenTypeCache { return PostAllocaInsertPt; } + /// API for captured statement code generation. class CGCapturedStmtInfo { public: @@ -3240,6 +3241,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// conditions for a no-loop kernel are met. void EmitNoLoopKernel(const OMPExecutableDirective &D, SourceLocation Loc); + void EmitBigJumpLoopKernel(const OMPExecutableDirective &D, + SourceLocation Loc); + /// EmitXteamRedKernel - For an OpenMP target reduction directive, emit the /// kernel code assuming that related runtime environment variables can be /// ignored. @@ -3255,7 +3259,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// associated variables. Returns the loop iteration variable and its address. std::pair EmitNoLoopIV(const OMPLoopDirective &LD); - void EmitXteamRedUpdates(const ForStmt &FStmt); + /// Emit updates of the original loop indices. Used by both + /// BigJumpLoop and Xteam reduction kernel codegen. + void EmitBigJumpLoopUpdates(const ForStmt &FStmt); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically @@ -3681,6 +3687,11 @@ class CodeGenFunction : public CodeGenTypeCache { static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S); + /// Emit device code for the target parallel loop directive. + static void EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S); + /// Emit the Stmt \p S and return its topmost canonical loop, if any. /// TODO: The \p Depth paramter is not yet implemented and must be 1. In the /// future it is meant to be the number of loops expected in the loop nests @@ -4901,12 +4912,17 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *applyNoLoopInc(const Expr *Inc, const VarDecl *IVDecl, llvm::Value *CurrVal); + /// Emit the starting index of a BigJumpLoop which is used in + /// BigJumpLoop and Xteam reduction kernels. std::pair - EmitXteamRedStartingIndex(const ForStmt &FStmt); - void EmitXteamRedInc(const ForStmt &FStmt, const VarDecl *LoopVar, - const Address &NoLoopIvAddr); + EmitBigJumpLoopStartingIndex(const ForStmt &FStmt); + /// Emit the increment of a BigJumpLoop which is used in BigJumpLoop + /// and Xteam reduction kernels. + void EmitBigJumpLoopInc(const ForStmt &FStmt, const VarDecl *LoopVar, + const Address &NoLoopIvAddr); void EmitXteamLocalAggregator(const ForStmt *FStmt); - void EmitXteamRedSum(const ForStmt *FStmt, const FunctionArgList &Args); + void EmitXteamRedSum(const ForStmt *FStmt, const FunctionArgList &Args, + int BlockSize); bool EmitXteamRedStmt(const Stmt *S); }; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a2e8078b93ec3..b4e66ad64a9a9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -584,6 +584,9 @@ void CodeGenModule::Release() { "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr, llvm::GlobalVariable::NotThreadLocal); addCompilerUsedGlobal(Var); + if (!getModule().getModuleFlag("amdgpu_hostcall")) { + getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); + } } // Emit amdgpu_code_object_version module flag, which is code object version // times 100. @@ -3314,12 +3317,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -7250,6 +7255,9 @@ void CodeGenModule::emitNxResult(std::string StatusMsg, switch (Status) { case NxSuccess: break; + case NxNonSPMD: + StatusMsg += "Non-SPMD mode not supported"; + break; case NxOptionDisabled: StatusMsg += "Command line option disabled"; break; @@ -7304,6 +7312,9 @@ void CodeGenModule::emitNxResult(std::string StatusMsg, case NxNonUnitStaticChunk: StatusMsg += "Schedule clause with non-unit chunk size"; break; + case NxNonConcurrentOrder: + StatusMsg += "Non-concurrent order not supported"; + break; case NxUnsupportedRedType: StatusMsg += "Unsupported reduction variable type"; break; @@ -7329,6 +7340,9 @@ void CodeGenModule::emitNxResult(std::string StatusMsg, case NxUnsupportedRedExpr: StatusMsg += "Unsupported reduction expression found"; break; + case NxUnsupportedXteamRedThreadLimit: + StatusMsg += "Thread Limit less than 256 not supported"; + break; } SourceLocation L = D.getBeginLoc(); @@ -7556,6 +7570,77 @@ CodeGenModule::getNoLoopForStmtStatus(const OMPExecutableDirective &D, return NxSuccess; } +int CodeGenModule::getWorkGroupSizeSPMDHelper(const OMPExecutableDirective &D) { + // Honor block-size provided by command-line option. This logic must be kept + // in sync with metadata generation. If this option is not specified on the + // command line then the value used will be the 256. + int WorkGroupSz = getLangOpts().OpenMPGPUThreadsPerTeam; + + // Cross team reduction blocksize default may be specified separately. + if (isXteamRedKernel(D)) + WorkGroupSz = getLangOpts().OpenMPTargetXteamReductionBlockSize; + + // Check block-size provided by thread_limit clause. We start with the + // maximum thread limit and lower it if user requests a lower thread limit. + int ThreadLimit = getTarget().getGridValue().GV_Max_WG_Size; + const auto *ThreadLimitClause = D.getSingleClause(); + if (ThreadLimitClause) { + Expr *ThreadLimitExpr = ThreadLimitClause->getThreadLimit(); + clang::Expr::EvalResult Result; + if (ThreadLimitExpr->EvaluateAsInt(Result, getContext())) { + int ThreadLimitEval = Result.Val.getInt().getExtValue(); + if (ThreadLimitEval > 0 && ThreadLimitEval < ThreadLimit) + ThreadLimit = ThreadLimitEval; + } + } + + // If the command line work group size is less than any default or user + // specified thread limit then it is honored otherwise the thread limit + // determined above will be used. + if (WorkGroupSz > ThreadLimit) + WorkGroupSz = ThreadLimit; + + // Set the actual number of threads if the user requests a value different + // then the default. If the value is greater than the currently computed + // thread limit then cap the number of threads to the thread limit. + int NumThreads = getTarget().getGridValue().GV_Default_WG_Size; + const auto *NumThreadsClause = D.getSingleClause(); + if (NumThreadsClause) { + Expr *NumThreadsExpr = NumThreadsClause->getNumThreads(); + clang::Expr::EvalResult Result; + if (NumThreadsExpr->EvaluateAsInt(Result, getContext())) { + NumThreads = Result.Val.getInt().getExtValue(); + // Cap the number of threads to the current thread limit. + if (NumThreads > ThreadLimit) + NumThreads = ThreadLimit; + // num_threads clause takes precendence over the command line value: + WorkGroupSz = NumThreads; + } + } + + // Sanitize the workgroup size received from the command line. Its default + // value is GV_Default_WG_Size. + if (WorkGroupSz < 1 || WorkGroupSz > ThreadLimit) + WorkGroupSz = getTarget().getGridValue().GV_Default_WG_Size; + + return WorkGroupSz; +} + +int CodeGenModule::computeXteamRedBlockSize(const OMPExecutableDirective &D) { + int InitialBlockSize = getWorkGroupSizeSPMDHelper(D); + // We support block sizes 64, 128, 256, 512, and 1024 only for Xteam + // reduction. + if (InitialBlockSize < 128) + return 64; + if (InitialBlockSize < 256) + return 128; + if (InitialBlockSize < 512) + return 256; + if (InitialBlockSize < 1024) + return 512; + return 1024; +} + CodeGenModule::NoLoopXteamErr CodeGenModule::getXteamRedForStmtStatus(const OMPExecutableDirective &D, const Stmt *OMPStmt, @@ -7617,10 +7702,26 @@ CodeGenModule::getNoLoopCompatibleOrderStatus(const OMPLoopDirective &LD) { return NxSuccess; } +CodeGenModule::NoLoopXteamErr +CodeGenModule::getXteamRedCompatibleThreadLimitStatus( + const OMPLoopDirective &LD) { + const auto *ThreadLimitClause = LD.getSingleClause(); + if (!ThreadLimitClause) + return NxSuccess; + Expr *ThreadLimitExpr = ThreadLimitClause->getThreadLimit(); + clang::Expr::EvalResult Result; + if (ThreadLimitExpr->EvaluateAsInt(Result, getContext())) { + int ThreadLimitEval = Result.Val.getInt().getExtValue(); + // We support thread limit >= 64 + if (ThreadLimitEval > 63) + return NxSuccess; + } + return NxUnsupportedXteamRedThreadLimit; +} + CodeGenModule::NoLoopXteamErr CodeGenModule::getNoLoopCombinedClausesStatus(const OMPExecutableDirective &D) { if (D.hasClausesOfKind() || - D.hasClausesOfKind() || D.hasClausesOfKind() || D.hasClausesOfKind() || D.hasClausesOfKind() || @@ -7641,9 +7742,6 @@ CodeGenModule::NoLoopXteamErr CodeGenModule::getXteamRedCombinedClausesStatus( if (D.hasClausesOfKind() || D.hasClausesOfKind() || D.hasClausesOfKind() || - D.hasClausesOfKind() || - D.hasClausesOfKind() || - D.hasClausesOfKind() || D.hasClausesOfKind() || D.hasClausesOfKind() || D.hasClausesOfKind() || @@ -7653,6 +7751,8 @@ CodeGenModule::NoLoopXteamErr CodeGenModule::getXteamRedCombinedClausesStatus( return NxNotLoopDirective; const OMPLoopDirective &LD = cast(D); NoLoopXteamErr NxStatus = NxSuccess; + if ((NxStatus = getXteamRedCompatibleThreadLimitStatus(LD))) + return NxStatus; if ((NxStatus = getNoLoopCompatibleOrderStatus(LD))) return NxStatus; return getNoLoopCompatibleSchedStatus(LD); @@ -7776,7 +7876,8 @@ CodeGenModule::NoLoopXteamErr CodeGenModule::checkAndSetNoLoopTargetConstruct( CodeGenModule::NoLoopXteamErr CodeGenModule::checkAndSetNoLoopKernel(const OMPExecutableDirective &D) { NoLoopXteamErr NxStatus = NxSuccess; - if (!getLangOpts().OpenMPTargetIgnoreEnvVars) + if (!getLangOpts().OpenMPTargetIgnoreEnvVars || + !getLangOpts().OpenMPNoNestedParallelism) return NxOptionDisabled; if (D.getDirectiveKind() != @@ -7807,7 +7908,21 @@ CodeGenModule::checkAndSetNoLoopKernel(const OMPExecutableDirective &D) { NoLoopIntermediateStmts IntermediateStmts; // Push top-level directive IntermediateStmts.push_back(&D); - setNoLoopKernel(AssocStmt, IntermediateStmts); + + // Now we should determine whether this qualifies as a NoLoop or a + // BigJumpLoop kernel. BigJumpLoop is enabled whenever NoLoop is + // enabled. If the num_teams clause is specified, BigJumpLoop is + // chosen. If the command line option to force BigJumpLoop is used, + // it is preferred over No-Loop. + if (D.hasClausesOfKind() || + getLangOpts().OpenMPTargetBigJumpLoop) { + const ForStmt *FStmt = getSingleForStmt(AssocStmt); + assert(FStmt && "For stmt cannot be null"); + BigJumpLoopKernels.insert(std::make_pair( + FStmt, BigJumpLoopKernelInfo(getWorkGroupSizeSPMDHelper(D), + IntermediateStmts))); + } else + setNoLoopKernel(AssocStmt, IntermediateStmts); // All checks passed return NxSuccess; @@ -7816,7 +7931,8 @@ CodeGenModule::checkAndSetNoLoopKernel(const OMPExecutableDirective &D) { CodeGenModule::NoLoopXteamErr CodeGenModule::checkAndSetXteamRedKernel(const OMPExecutableDirective &D) { NoLoopXteamErr NxStatus = NxSuccess; - if (!getLangOpts().OpenMPTargetIgnoreEnvVars) + if (!getLangOpts().OpenMPTargetIgnoreEnvVars || + !getLangOpts().OpenMPNoNestedParallelism) return NxOptionDisabled; // Allowing only a combined construct for now @@ -7852,13 +7968,43 @@ CodeGenModule::checkAndSetXteamRedKernel(const OMPExecutableDirective &D) { assert(FStmt && "For stmt cannot be null"); XteamRedKernels.insert(std::make_pair( FStmt, XteamRedKernelInfo(/*ThreadStartIndex=*/nullptr, - /*NumTeams=*/nullptr, IntermediateStmts, + /*NumTeams=*/nullptr, + /*BlockSize=*/0, IntermediateStmts, RedVarMapPair.second))); + // The blocksize has to be computed after adding this kernel to the metadata + // above, since the computation below depends on that metadata. Compute block + // size during device compilation only. + int BlockSize = + getLangOpts().OpenMPIsDevice ? computeXteamRedBlockSize(D) : 0; + if (BlockSize > 0) + updateXteamRedKernel(FStmt, BlockSize); + // All checks passed return NxSuccess; } +bool CodeGenModule::isXteamRedKernel(const OMPExecutableDirective &D) { + if (!D.hasAssociatedStmt()) + return false; + const ForStmt *FStmt = getSingleForStmt(D.getAssociatedStmt()); + if (FStmt == nullptr) + return false; + return isXteamRedKernel(FStmt); +} + +int CodeGenModule::getXteamRedBlockSize(const ForStmt *FStmt) { + assert(XteamRedKernels.find(FStmt) != XteamRedKernels.end() && + "Metadata missing for Xteam kernel"); + return XteamRedKernels.find(FStmt)->second.BlockSize; +} + +int CodeGenModule::getXteamRedBlockSize(const OMPExecutableDirective &D) { + assert(isXteamRedKernel(D) && "Expected an Xteam reduction kernel"); + const ForStmt *FStmt = getSingleForStmt(D.getAssociatedStmt()); + return getXteamRedBlockSize(FStmt); +} + void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { assert(DeferredDeclsToEmit.empty() && "Should have emitted all decls deferred to emit."); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 04b9853989384..1bb0669e53a77 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -301,6 +301,7 @@ class CodeGenModule : public CodeGenTypeCache { enum NoLoopXteamErr { NxSuccess, + NxNonSPMD, NxOptionDisabled, NxUnsupportedDirective, NxUnsupportedSplitDirective, @@ -327,7 +328,8 @@ class CodeGenModule : public CodeGenTypeCache { NxUnsupportedRedOp, NxNoRedVar, NxMultRedVar, - NxUnsupportedRedExpr + NxUnsupportedRedExpr, + NxUnsupportedXteamRedThreadLimit }; /// Top-level and nested OpenMP directives that may use no-loop codegen. @@ -336,6 +338,16 @@ class CodeGenModule : public CodeGenTypeCache { /// Map construct statement to the intermediate ones for no-loop codegen using NoLoopKernelMap = llvm::DenseMap; + struct BigJumpLoopKernelInfo { + BigJumpLoopKernelInfo(int BlkSz, NoLoopIntermediateStmts Stmts) + : BlockSize{BlkSz}, BigJumpLoopIntStmts{Stmts} {} + + int BlockSize; + NoLoopIntermediateStmts BigJumpLoopIntStmts; + }; + using BigJumpLoopKernelMap = + llvm::DenseMap; + /// Map a reduction variable to the corresponding metadata. The metadata /// contains // the reduction expression, the coorresponding Xteam local aggregator var, @@ -349,16 +361,15 @@ class CodeGenModule : public CodeGenTypeCache { size_t ArgPos; }; using XteamRedVarMap = llvm::DenseMap; - // using XteamRedKernelInfo = std::pair; struct XteamRedKernelInfo { - XteamRedKernelInfo(llvm::Value *TSI, llvm::Value *NT, + XteamRedKernelInfo(llvm::Value *TSI, llvm::Value *NT, int BlkSz, NoLoopIntermediateStmts Stmts, XteamRedVarMap RVM) - : ThreadStartIndex{TSI}, NumTeams{NT}, XteamIntStmts{Stmts}, - XteamRedVars{RVM} {} + : ThreadStartIndex{TSI}, NumTeams{NT}, BlockSize{BlkSz}, + XteamIntStmts{Stmts}, XteamRedVars{RVM} {} llvm::Value *ThreadStartIndex; llvm::Value *NumTeams; + int BlockSize; NoLoopIntermediateStmts XteamIntStmts; XteamRedVarMap XteamRedVars; }; @@ -410,6 +421,7 @@ class CodeGenModule : public CodeGenTypeCache { const Stmt *CurrentXteamRedStmt = nullptr; NoLoopKernelMap NoLoopKernels; + BigJumpLoopKernelMap BigJumpLoopKernels; XteamRedKernelMap XteamRedKernels; // A set of references that have only been seen via a weakref so far. This is @@ -1622,6 +1634,9 @@ class CodeGenModule : public CodeGenTypeCache { /// Given the order clause, can No-Loop code be generated? NoLoopXteamErr getNoLoopCompatibleOrderStatus(const OMPLoopDirective &LD); + NoLoopXteamErr + getXteamRedCompatibleThreadLimitStatus(const OMPLoopDirective &LD); + /// Helper functions for generating a NoLoop kernel /// For a captured statement, get the single For statement, if it exists, /// otherwise return nullptr. @@ -1661,11 +1676,34 @@ class CodeGenModule : public CodeGenTypeCache { return NoLoopKernels.find(S) != NoLoopKernels.end(); } + /// Given a top-level target construct for BigJumpLoop codegen, get the + /// intermediate OpenMP constructs. + const NoLoopIntermediateStmts &getBigJumpLoopStmts(const Stmt *S) { + assert(isBigJumpLoopKernel(S)); + return BigJumpLoopKernels.find(S)->second.BigJumpLoopIntStmts; + } + + /// Get the cached blocksize to be used for this BigJumpLoop kernel. + int getBigJumpLoopBlockSize(const Stmt *S) { + assert(isBigJumpLoopKernel(S)); + return BigJumpLoopKernels.find(S)->second.BlockSize; + } + + /// Erase BigJumpLoop related metadata for the input statement. + void resetBigJumpLoopKernel(const Stmt *S) { BigJumpLoopKernels.erase(S); } + /// Is a BigJumpLoop kernel generated for the input statement? + bool isBigJumpLoopKernel(const Stmt *S) { + return BigJumpLoopKernels.find(S) != BigJumpLoopKernels.end(); + } + /// If we are able to generate a Xteam reduction kernel for this directive, /// return true, otherwise return false. If successful, metadata for the /// reduction variables are created for subsequent codegen phases to work on. NoLoopXteamErr checkAndSetXteamRedKernel(const OMPExecutableDirective &D); + /// Compute the block size to be used for a kernel + int getWorkGroupSizeSPMDHelper(const OMPExecutableDirective &D); + /// Given a ForStmt for which Xteam codegen will be done, return the /// intermediate statements for a split directive. const NoLoopIntermediateStmts &getXteamRedStmts(const Stmt *S) { @@ -1715,12 +1753,22 @@ class CodeGenModule : public CodeGenTypeCache { KernelInfo.NumTeams = NTeams; } + void updateXteamRedKernel(const Stmt *S, int BlkSz) { + assert(isXteamRedKernel(S)); + XteamRedKernels.find(S)->second.BlockSize = BlkSz; + } + + // Get the already-computed block size used by Xteam reduction + int getXteamRedBlockSize(const ForStmt *FStmt); + int getXteamRedBlockSize(const OMPExecutableDirective &D); + /// Erase spec-red related metadata for the input statement void resetXteamRedKernel(const Stmt *S) { XteamRedKernels.erase(S); } /// Are we generating xteam reduction kernel for the statement bool isXteamRedKernel(const Stmt *S) { return XteamRedKernels.find(S) != XteamRedKernels.end(); } + bool isXteamRedKernel(const OMPExecutableDirective &D); void setCurrentXteamRedStmt(const Stmt *S) { CurrentXteamRedStmt = S; } const Stmt *getCurrentXteamRedStmt() { return CurrentXteamRedStmt; } @@ -1924,6 +1972,9 @@ class CodeGenModule : public CodeGenTypeCache { NoLoopXteamErr getNoLoopForStmtStatus(const OMPExecutableDirective &, const Stmt *); + // Compute the block size used by Xteam reduction + int computeXteamRedBlockSize(const OMPExecutableDirective &D); + /// Top level checker for xteam reduction of the loop NoLoopXteamErr getXteamRedForStmtStatus(const OMPExecutableDirective &, const Stmt *, const XteamRedVarMap &); diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index adb9f8028bccf..babd38bb68155 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -352,7 +353,8 @@ void Compilation::ExecuteJobs(const JobList &Jobs, const Command *Next = nullptr; while (!JS.IsDone(Next)) { if (!Next) { - std::this_thread::yield(); + // sleep, rather than yield so we do not busy wait. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8e2b40ae6b1c2..765d4af90b656 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6007,15 +6007,16 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, !C.getArgs().hasArg(options::OPT__SLASH_Fo)) || CCGenDiagnostics) { StringRef Name = llvm::sys::path::filename(BaseInput); - std::pair Split = Name.split('.'); - SmallString<128> fname(Split.first.str().c_str()); + size_t pos = Name.find_last_of("."); + StringRef PrefixName = Name.substr(0, pos); + SmallString<128> fname(PrefixName.str().c_str()); if (!BoundArch.empty()) { fname += "-"; fname.append(BoundArch); } SmallString<128> TmpName; const char *Suffix = nullptr; - if (Split.second == "a") + if (Name.ends_with(".a")) Suffix = "a"; else Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode()); @@ -6035,11 +6036,11 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, } } else { if (MultipleArchs && !BoundArch.empty()) { - TmpName = GetTemporaryDirectory(Split.first); + TmpName = GetTemporaryDirectory(PrefixName); llvm::sys::path::append(TmpName, - Split.first + "-" + BoundArch + "." + Suffix); + PrefixName + "-" + BoundArch + "." + Suffix); } else { - TmpName = GetTemporaryPath(Split.first, Suffix); + TmpName = GetTemporaryPath(PrefixName, Suffix); } } return C.addTempFile(C.getArgs().MakeArgString(TmpName)); @@ -6123,7 +6124,11 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object); } else { - const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode()); + const char *Suffix = nullptr; + if (BaseName.ends_with(".a")) + Suffix = "a"; + else + Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode()); assert(Suffix && "All types used for output should have a suffix."); std::string::size_type End = std::string::npos; @@ -6179,9 +6184,10 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, // Must share the same path to conflict. if (SameFile) { StringRef Name = llvm::sys::path::filename(BaseInput); - std::pair Split = Name.split('.'); + size_t pos = Name.find_last_of("."); + StringRef PrefixName = Name.substr(0, pos); std::string TmpName = GetTemporaryPath( - Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode())); + PrefixName, types::getTypeTempSuffix(JA.getType(), IsCLMode())); return C.addTempFile(C.getArgs().MakeArgString(TmpName)); } } diff --git a/clang/lib/Driver/ToolChains/AMDFlang.cpp b/clang/lib/Driver/ToolChains/AMDFlang.cpp index 9c8c5aa9582e1..7c4b3d939d050 100644 --- a/clang/lib/Driver/ToolChains/AMDFlang.cpp +++ b/clang/lib/Driver/ToolChains/AMDFlang.cpp @@ -999,6 +999,13 @@ void AMDFlang::ConstructJob(Compilation &C, const JobAction &JA, // Remove "noinline" attriblute LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("183"); LowerCmdArgs.push_back("0x10"); + // Move option 234 flang reductions up to -fopenmp-target-fast + // instructing flang2 to use 32 teams for reduction tuning via opt 234. + if (Args.hasFlag(options::OPT_fopenmp_target_fast, + options::OPT_fno_openmp_target_fast, false)) { + LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("234"); LowerCmdArgs.push_back("32"); + } + // Set a -x flag for second part of Fortran frontend for (Arg *A : Args.filtered(options::OPT_Mx_EQ)) { A->claim(); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 96a1856e57682..dd81c408d006f 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -243,7 +243,8 @@ RocmInstallationDetector::getInstallationPathCandidates() { } // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin - if (ParentName == "llvm") + // Some versions of the aomp package install to /opt/rocm/aomp/bin + if (ParentName == "llvm" || ParentName.startswith("aomp")) ParentDir = llvm::sys::path::parent_path(ParentDir); // Some versions of the aomp package install to /opt/rocm/aomp/bin // and it seems ParentDir is already pointing to correct place. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 0eac7869620b1..006fcae426d8b 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -52,7 +52,7 @@ namespace toolchains { class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { protected: const std::map OptionsDefault; - unsigned CodeObjectVersion = 4; + unsigned CodeObjectVersion = 5; Tool *buildLinker() const override; StringRef getOptionDefault(options::ID OptID) const { auto opt = OptionsDefault.find(OptID); diff --git a/clang/lib/Driver/ToolChains/AmdOptArgs.cpp b/clang/lib/Driver/ToolChains/AmdOptArgs.cpp index 8f2fff3b4a278..fea33d0dd36c8 100644 --- a/clang/lib/Driver/ToolChains/AmdOptArgs.cpp +++ b/clang/lib/Driver/ToolChains/AmdOptArgs.cpp @@ -43,7 +43,6 @@ static bool hasLlvmAoccOption(const ArgList &Args) { Flags.insert(std::make_pair("-mark-rv-outline", true)); Flags.insert(std::make_pair("-rv-outline", true)); Flags.insert(std::make_pair("-rv-depth", true)); - Flags.insert(std::make_pair("-rv-max-reg-size", true)); Flags.insert(std::make_pair("-enable-branch-combine", true)); Flags.insert(std::make_pair("-simplifycfg-no-storesink", true)); Flags.insert(std::make_pair("-inline-aggressive", true)); @@ -278,11 +277,6 @@ static bool checkForPropOpts(const ToolChain &TC, const Driver &D, } ClosedToolChainNeeded = true; } else if ((MArch == "znver2") || (MArch == "znver3")) { - // -rv-max-reg-size=256 around 5% gain on nab - if (!checkOnly) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-rv-max-reg-size=256"); - } ClosedToolChainNeeded = true; } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d3157bdacf501..e763e5f3c2bfd 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -768,8 +768,8 @@ static bool isTargetFastUsed(const ArgList &Args) { options::OPT_fno_openmp_target_fast, isOFastUsed(Args)); } -/// Ignore possibility of runtime environment variables during kernel code -/// generation at -O3 (and above) and -Ofast +/// Ignore possibility of environment variables if either +/// -fopenmp-target-fast or -Ofast is used. static bool shouldIgnoreEnvVars(const ArgList &Args) { if (Args.hasFlag(options::OPT_fno_openmp_target_fast, options::OPT_fopenmp_target_fast, false)) @@ -778,29 +778,6 @@ static bool shouldIgnoreEnvVars(const ArgList &Args) { if (isTargetFastUsed(Args)) return true; - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - StringRef S(A->getValue()); - if (S == "s") - return false; - - if (S == "z") - return false; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 2; - } - return false; } @@ -6257,6 +6234,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_gpu_threads_per_team_EQ); + Args.AddAllArgs(CmdArgs, + options::OPT_fopenmp_target_xteam_reduction_blocksize_EQ); if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse, options::OPT_fno_openmp_optimistic_collapse, /*Default=*/false)) @@ -6270,6 +6249,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } else CmdArgs.push_back("-fno-openmp-target-fast"); + if (Args.hasFlag(options::OPT_fopenmp_target_ignore_env_vars, options::OPT_fno_openmp_target_ignore_env_vars, shouldIgnoreEnvVars(Args))) @@ -6277,6 +6257,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else CmdArgs.push_back("-fno-openmp-target-ignore-env-vars"); + if (Args.hasFlag(options::OPT_fopenmp_target_big_jump_loop, + options::OPT_fno_openmp_target_big_jump_loop, false)) + CmdArgs.push_back("-fopenmp-target-big-jump-loop"); + else + CmdArgs.push_back("-fno-openmp-target-big-jump-loop"); + // When in OpenMP offloading mode with NVPTX target, forward // cuda-mode flag if (Args.hasFlag(options::OPT_fopenmp_cuda_mode, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 89a3dd6f8901c..5428d3f42705e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2267,7 +2267,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D, const llvm::opt::ArgList &Args) { - unsigned CodeObjVer = 4; // default + unsigned CodeObjVer = 5; // default if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { if (CodeObjArg->getOption().getID() == options::OPT_mno_code_object_v3_legacy) { diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 7315fd89ef0ec..9f78bdc003264 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -111,7 +111,12 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, const llvm::opt::ArgList &Args) const { // Construct lld command. // The output from ld.lld is an HSA code object file. - ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared", + ArgStringList LldArgs{"-flavor", + "gnu", + "-m", + "elf64_amdgpu", + "--no-undefined", + "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; auto &TC = getToolChain(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0744e8df626cc..48f0bbfcabfb9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3465,6 +3465,11 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, else GenerateArg(Args, OPT_fno_openmp_target_ignore_env_vars, SA); + if (Opts.OpenMPTargetBigJumpLoop) + GenerateArg(Args, OPT_fopenmp_target_big_jump_loop, SA); + else + GenerateArg(Args, OPT_fno_openmp_target_big_jump_loop, SA); + if (Opts.OpenMPThreadSubscription) GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); @@ -3501,6 +3506,10 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fopenmp_gpu_threads_per_team_EQ, Twine(Opts.OpenMPGPUThreadsPerTeam), SA); + if (Opts.OpenMPTargetXteamReductionBlockSize != 1024) + GenerateArg(Args, OPT_fopenmp_target_xteam_reduction_blocksize_EQ, + Twine(Opts.OpenMPTargetXteamReductionBlockSize), SA); + if (!Opts.OMPTargetTriples.empty()) { std::string Targets; llvm::raw_string_ostream OS(Targets); @@ -3909,11 +3918,17 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, getLastArgIntValue(Args, options::OPT_fopenmp_gpu_threads_per_team_EQ, Opts.OpenMPGPUThreadsPerTeam, Diags); - // Turn ON at -O3 (and above) and -Ofast + Opts.OpenMPTargetXteamReductionBlockSize = getLastArgIntValue( + Args, options::OPT_fopenmp_target_xteam_reduction_blocksize_EQ, + Opts.OpenMPTargetXteamReductionBlockSize, Diags); + Opts.OpenMPTargetIgnoreEnvVars = Args.hasFlag(options::OPT_fopenmp_target_ignore_env_vars, - options::OPT_fno_openmp_target_ignore_env_vars, - getOptimizationLevel(Args, IK, Diags) > 2); + options::OPT_fno_openmp_target_ignore_env_vars, false); + + Opts.OpenMPTargetBigJumpLoop = + Args.hasFlag(options::OPT_fopenmp_target_big_jump_loop, + options::OPT_fno_openmp_target_big_jump_loop, false); // Set the value of the debugging flag used in the new offloading device RTL. // Set either by a specific value or to a default if not specified. diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index fb5ec3ab9c273..a6b17fc5ab6e9 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -166,7 +166,6 @@ uint64_t __make_mantissa(const char *__tagp) { } // BEGIN FLOAT -#if defined(__cplusplus) __DEVICE__ int abs(int __x) { int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); @@ -182,7 +181,6 @@ long long llabs(long long __x) { long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); return (__x ^ __sgn) - __sgn; } -#endif __DEVICE__ float acosf(float __x) { return __ocml_acos_f32(__x); } diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index e785c028fdb8b..5ff428c3f3d76 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -17,17 +17,15 @@ #endif // We require std::math functions in the complex builtins below. -#ifdef __NVPTX__ #include -#define __CUDA__ + +#ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ #endif // __NVPTX__ #ifdef __AMDGCN__ -#include <__clang_hip_libdevice_declares.h> -#define __ARCHTYPES__ amdgcn #define __OPENMP_AMDGCN__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_AMDGCN__ @@ -47,7 +45,6 @@ // arithmetic and calls to non-complex functions, all of which we can then // handle. #ifndef _LIBCPP_STD_VER -#ifndef _GLIBCXX_COMPLEX #pragma omp begin declare variant match( \ device = {arch(amdgcn, nvptx, nvptx64)}, \ @@ -57,5 +54,4 @@ #pragma omp end declare variant -#endif // _GLIBCXX_COMPLEX #endif // _LIBCPP_STD_VER diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index e34bd8d7bca40..a768c4da504af 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1291,7 +1291,22 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( if (getLangOpts().CUDA) { // In CUDA code, GNU attributes are allowed to appear immediately after the // "[...]", even if there is no "(...)" before the lambda body. - MaybeParseGNUAttributes(D); + // + // Note that we support __noinline__ as a keyword in this mode and thus + // it has to be separately handled. + while (true) { + if (Tok.is(tok::kw___noinline__)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + Attr.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, + ParsedAttr::AS_Keyword); + } else if (Tok.is(tok::kw___attribute)) + ParseGNUAttributes(Attr, nullptr, &D); + else + break; + } + + D.takeAttributes(Attr); } // Helper to emit a warning if we see a CUDA host/device/global attribute diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 6f8a467b9a657..2388fe0393683 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1801,9 +1801,21 @@ void Parser::ParseOMPDeclareTargetClauses( << getOpenMPClauseName(OMPC_indirect) << 0; break; } - bool IsToOrLinkClause = + bool IsToEnterOrLinkClause = OMPDeclareTargetDeclAttr::ConvertStrToMapTypeTy(ClauseName, MT); - assert((!IsDeviceTypeClause || !IsToOrLinkClause) && "Cannot be both!"); + assert((!IsDeviceTypeClause || !IsToEnterOrLinkClause) && + "Cannot be both!"); + + // Starting with OpenMP 5.2 the `to` clause has been replaced by the + // `enter` clause. + if (getLangOpts().OpenMP >= 52 && ClauseName == "to") { + Diag(Tok, diag::err_omp_declare_target_unexpected_to_clause); + break; + } + if (getLangOpts().OpenMP <= 51 && ClauseName == "enter") { + Diag(Tok, diag::err_omp_declare_target_unexpected_enter_clause); + break; + } if (!IsDeviceTypeClause && !IsIndirectClause && DTCI.Kind == OMPD_begin_declare_target) { @@ -1811,16 +1823,18 @@ void Parser::ParseOMPDeclareTargetClauses( << ClauseName << (getLangOpts().OpenMP >= 51 ? 3 : 0); break; } - if (!IsDeviceTypeClause && !IsToOrLinkClause && !IsIndirectClause) { - Diag(Tok, diag::err_omp_declare_target_unexpected_clause) + if (!IsDeviceTypeClause && !IsToEnterOrLinkClause && !IsIndirectClause) { + Diag(Tok, getLangOpts().OpenMP >= 52 + ? diag::err_omp_declare_target_unexpected_clause_52 + : diag::err_omp_declare_target_unexpected_clause) << ClauseName - << (getLangOpts().OpenMP >= 51 ? 4 - : getLangOpts().OpenMP >= 50 ? 2 - : 1); + << (getLangOpts().OpenMP >= 51 + ? 4 + : getLangOpts().OpenMP >= 50 ? 2 : 1); break; } - if (IsToOrLinkClause || IsIndirectClause) + if (IsToEnterOrLinkClause || IsIndirectClause) HasToOrLinkOrIndirectClause = true; if (IsIndirectClause) { @@ -1884,7 +1898,9 @@ void Parser::ParseOMPDeclareTargetClauses( } if (!HasIdentifier && Tok.isNot(tok::annot_pragma_openmp_end)) { Diag(Tok, - diag::err_omp_declare_target_unexpected_clause_after_implicit_to); + getLangOpts().OpenMP >= 52 + ? diag::err_omp_declare_target_wrong_clause_after_implicit_enter + : diag::err_omp_declare_target_wrong_clause_after_implicit_to); break; } @@ -1899,7 +1915,10 @@ void Parser::ParseOMPDeclareTargetClauses( // For declare target require at least 'to' or 'link' to be present. if (DTCI.Kind == OMPD_declare_target && RequiresToOrLinkOrIndirectClause && !HasToOrLinkOrIndirectClause) - Diag(DTCI.Loc, diag::err_omp_declare_target_missing_to_or_link_clause) + Diag(DTCI.Loc, + getLangOpts().OpenMP >= 52 + ? diag::err_omp_declare_target_missing_enter_or_link_clause + : diag::err_omp_declare_target_missing_to_or_link_clause) << (getLangOpts().OpenMP >= 51 ? 1 : 0); SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch); @@ -3970,7 +3989,8 @@ bool Parser::parseMapTypeModifiers(Sema::OpenMPVarListDataTy &Data) { if (PP.LookAhead(0).is(tok::colon)) return false; Diag(Tok, diag::err_omp_unknown_map_type_modifier) - << (getLangOpts().OpenMP >= 51 ? 1 : 0) + << (getLangOpts().OpenMP >= 51 ? (getLangOpts().OpenMP >= 52 ? 2 : 1) + : 0) << getLangOpts().OpenMPExtensions; ConsumeToken(); } @@ -4159,6 +4179,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, return true; bool HasIterator = false; + bool InvalidIterator = false; bool NeedRParenForLinear = false; BalancedDelimiterTracker LinearT(*this, tok::l_paren, tok::annot_pragma_openmp_end); @@ -4264,6 +4285,23 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, Data.ColonLoc = ConsumeToken(); } } else if (Kind == OMPC_map) { + // Handle optional iterator map modifier. + if (Tok.is(tok::identifier) && PP.getSpelling(Tok) == "iterator") { + HasIterator = true; + EnterScope(Scope::OpenMPDirectiveScope | Scope::DeclScope); + Data.MapTypeModifiers.push_back(OMPC_MAP_MODIFIER_iterator); + Data.MapTypeModifiersLoc.push_back(Tok.getLocation()); + ExprResult IteratorRes = ParseOpenMPIteratorsExpr(); + Data.IteratorExpr = IteratorRes.get(); + // Parse ',' + ExpectAndConsume(tok::comma); + if (getLangOpts().OpenMP < 52) { + Diag(Tok, diag::err_omp_unknown_map_type_modifier) + << (getLangOpts().OpenMP >= 51 ? 1 : 0) + << getLangOpts().OpenMPExtensions; + InvalidIterator = true; + } + } // Handle map type for map clause. ColonProtectionRAIIObject ColonRAII(*this); @@ -4293,6 +4331,12 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, } if (Data.ExtraModifier == OMPC_MAP_unknown) { Data.ExtraModifier = OMPC_MAP_tofrom; + if (getLangOpts().OpenMP >= 52) { + if (DKind == OMPD_target_enter_data) + Data.ExtraModifier = OMPC_MAP_to; + else if (DKind == OMPD_target_exit_data) + Data.ExtraModifier = OMPC_MAP_from; + } Data.IsMapTypeImplicit = true; } @@ -4455,7 +4499,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, ExitScope(); return (Kind != OMPC_depend && Kind != OMPC_map && Vars.empty()) || (MustHaveTail && !Data.DepModOrTailExpr) || InvalidReductionId || - IsInvalidMapperModifier; + IsInvalidMapperModifier || InvalidIterator; } /// Parsing of OpenMP clause 'private', 'firstprivate', 'lastprivate', diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2493b4a76d5e1..194f425626669 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -351,7 +351,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, // [OpenMP 5.0], 2.19.7.3. declare mapper Directive, Restrictions // List-items in map clauses on this construct may only refer to the declared // variable var and entities that could be referenced by a procedure defined - // at the same location + // at the same location. + // [OpenMP 5.2] Also allow iterator declared variables. if (LangOpts.OpenMP && isa(D) && !isOpenMPDeclareMapperVarDeclAllowed(cast(D))) { Diag(Loc, diag::err_omp_declare_mapper_wrong_var) @@ -5415,6 +5416,10 @@ ExprResult Sema::ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc, } else { CurContext->addDecl(VD); } + + /// Act on the iterator variable declaration. + ActOnOpenMPIteratorVarDecl(VD); + Expr *Begin = D.Range.Begin; if (!IsDeclTyDependent && Begin && !Begin->isTypeDependent()) { ExprResult BeginRes = diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index c093cf71b6e15..942f817d11c57 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -215,6 +215,7 @@ class DSAStackTy { llvm::SmallVector ImplicitDefaultFirstprivateFDs; Expr *DeclareMapperVar = nullptr; + SmallVector IteratorVarDecls; SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name, Scope *CurScope, SourceLocation Loc) : Directive(DKind), DirectiveName(Name), CurScope(CurScope), @@ -1140,6 +1141,22 @@ class DSAStackTy { const SharingMapTy *Top = getTopOfStackOrNull(); return Top ? Top->DeclareMapperVar : nullptr; } + + /// Add a new iterator variable. + void addIteratorVarDecl(VarDecl *VD) { + SharingMapTy &StackElem = getTopOfStack(); + StackElem.IteratorVarDecls.push_back(VD->getCanonicalDecl()); + } + /// Check if variable declaration is an iterator VarDecl. + bool isIteratorVarDecl(const VarDecl *VD) const { + const SharingMapTy *Top = getTopOfStackOrNull(); + if (!Top) + return false; + + return llvm::any_of(Top->IteratorVarDecls, [VD](const VarDecl *IteratorVD) { + return IteratorVD == VD->getCanonicalDecl(); + }); + } /// get captured field from ImplicitDefaultFirstprivateFDs VarDecl *getImplicitFDCapExprDecl(const FieldDecl *FD) const { const_iterator I = begin(); @@ -2702,6 +2719,24 @@ void Sema::finalizeOpenMPDelayedAnalysis(const FunctionDecl *Caller, } if (!LangOpts.OpenMPIsDevice && !LangOpts.OpenMPOffloadMandatory && DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { + // In OpenMP 5.2 or later, if the function has a host variant then allow + // that to be called instead + auto &&HasHostAttr = [](const FunctionDecl *Callee) { + for (OMPDeclareVariantAttr *A : + Callee->specific_attrs()) { + auto *DeclRefVariant = cast(A->getVariantFuncRef()); + auto *VariantFD = cast(DeclRefVariant->getDecl()); + Optional DevTy = + OMPDeclareTargetDeclAttr::getDeviceType( + VariantFD->getMostRecentDecl()); + if (!DevTy || *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + } + return false; + }; + if (getLangOpts().OpenMP >= 52 && + Callee->hasAttr() && HasHostAttr(Callee)) + return; // Diagnose nohost function called during host codegen. StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( OMPC_device_type, OMPC_DEVICE_TYPE_nohost); @@ -6043,7 +6078,7 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, CXXScopeSpec MapperIdScopeSpec; DeclarationNameInfo MapperId; if (OMPClause *NewClause = S.ActOnOpenMPMapClause( - C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), + nullptr, C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), MapperIdScopeSpec, MapperId, C->getMapType(), /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), SubExprs, OMPVarListLocTy())) @@ -6185,8 +6220,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( CXXScopeSpec MapperIdScopeSpec; DeclarationNameInfo MapperId; if (OMPClause *Implicit = ActOnOpenMPMapClause( - OMPC_MAP_MODIFIER_unknown, SourceLocation(), MapperIdScopeSpec, - MapperId, OMPC_MAP_tofrom, + nullptr, OMPC_MAP_MODIFIER_unknown, SourceLocation(), + MapperIdScopeSpec, MapperId, OMPC_MAP_tofrom, /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), Exprs, OMPVarListLocTy(), /*NoDiagnose=*/true)) ClausesWithImplicit.emplace_back(Implicit); @@ -6202,7 +6237,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( DeclarationNameInfo MapperId; auto Kind = static_cast(ClauseKindCnt); if (OMPClause *Implicit = ActOnOpenMPMapClause( - ImplicitMapModifiers[I], ImplicitMapModifiersLoc[I], + nullptr, ImplicitMapModifiers[I], ImplicitMapModifiersLoc[I], MapperIdScopeSpec, MapperId, Kind, /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), ImplicitMap, OMPVarListLocTy())) { @@ -17584,7 +17619,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause(OpenMPClauseKind Kind, assert(0 <= ExtraModifier && ExtraModifier <= OMPC_MAP_unknown && "Unexpected map modifier."); Res = ActOnOpenMPMapClause( - Data.MapTypeModifiers, Data.MapTypeModifiersLoc, + Data.IteratorExpr, Data.MapTypeModifiers, Data.MapTypeModifiersLoc, Data.ReductionOrMapperIdScopeSpec, Data.ReductionOrMapperId, static_cast(ExtraModifier), Data.IsMapTypeImplicit, ExtraModifierLoc, ColonLoc, VarList, Locs); @@ -21638,10 +21673,12 @@ static void checkMappableExpressionList( // target enter data // OpenMP [2.10.2, Restrictions, p. 99] // A map-type must be specified in all map clauses and must be either - // to or alloc. + // to or alloc. Starting with OpenMP 5.2 the default map type is `to` if + // no map type is present. OpenMPDirectiveKind DKind = DSAS->getCurrentDirective(); if (DKind == OMPD_target_enter_data && - !(MapType == OMPC_MAP_to || MapType == OMPC_MAP_alloc)) { + !(MapType == OMPC_MAP_to || MapType == OMPC_MAP_alloc || + SemaRef.getLangOpts().OpenMP >= 52)) { SemaRef.Diag(StartLoc, diag::err_omp_invalid_map_type_for_directive) << (IsMapTypeImplicit ? 1 : 0) << getOpenMPSimpleClauseTypeName(OMPC_map, MapType) @@ -21652,10 +21689,11 @@ static void checkMappableExpressionList( // target exit_data // OpenMP [2.10.3, Restrictions, p. 102] // A map-type must be specified in all map clauses and must be either - // from, release, or delete. + // from, release, or delete. Starting with OpenMP 5.2 the default map + // type is `from` if no map type is present. if (DKind == OMPD_target_exit_data && !(MapType == OMPC_MAP_from || MapType == OMPC_MAP_release || - MapType == OMPC_MAP_delete)) { + MapType == OMPC_MAP_delete || SemaRef.getLangOpts().OpenMP >= 52)) { SemaRef.Diag(StartLoc, diag::err_omp_invalid_map_type_for_directive) << (IsMapTypeImplicit ? 1 : 0) << getOpenMPSimpleClauseTypeName(OMPC_map, MapType) @@ -21744,7 +21782,7 @@ static void checkMappableExpressionList( } OMPClause *Sema::ActOnOpenMPMapClause( - ArrayRef MapTypeModifiers, + Expr *IteratorModifier, ArrayRef MapTypeModifiers, ArrayRef MapTypeModifiersLoc, CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, @@ -21754,9 +21792,14 @@ OMPClause *Sema::ActOnOpenMPMapClause( OpenMPMapModifierKind Modifiers[] = { OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, - OMPC_MAP_MODIFIER_unknown}; + OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown}; SourceLocation ModifiersLoc[NumberOfOMPMapClauseModifiers]; + if (IteratorModifier && !IteratorModifier->getType()->isSpecificBuiltinType( + BuiltinType::OMPIterator)) + Diag(IteratorModifier->getExprLoc(), + diag::err_omp_map_modifier_not_iterator); + // Process map-type-modifiers, flag errors for duplicate modifiers. unsigned Count = 0; for (unsigned I = 0, E = MapTypeModifiers.size(); I < E; ++I) { @@ -21780,11 +21823,11 @@ OMPClause *Sema::ActOnOpenMPMapClause( // We need to produce a map clause even if we don't have variables so that // other diagnostics related with non-existing map clauses are accurate. - return OMPMapClause::Create(Context, Locs, MVLI.ProcessedVarList, - MVLI.VarBaseDeclarations, MVLI.VarComponents, - MVLI.UDMapperList, Modifiers, ModifiersLoc, - MapperIdScopeSpec.getWithLocInContext(Context), - MapperId, MapType, IsMapTypeImplicit, MapLoc); + return OMPMapClause::Create( + Context, Locs, MVLI.ProcessedVarList, MVLI.VarBaseDeclarations, + MVLI.VarComponents, MVLI.UDMapperList, IteratorModifier, Modifiers, + ModifiersLoc, MapperIdScopeSpec.getWithLocInContext(Context), MapperId, + MapType, IsMapTypeImplicit, MapLoc); } QualType Sema::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc, @@ -22178,6 +22221,11 @@ Sema::ActOnOpenMPDeclareMapperDirectiveVarDecl(Scope *S, QualType MapperType, return E; } +void Sema::ActOnOpenMPIteratorVarDecl(VarDecl *VD) { + if (DSAStack->getDeclareMapperVarRef()) + DSAStack->addIteratorVarDecl(VD); +} + bool Sema::isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const { assert(LangOpts.OpenMP && "Expected OpenMP mode."); const Expr *Ref = DSAStack->getDeclareMapperVarRef(); @@ -22186,6 +22234,8 @@ bool Sema::isOpenMPDeclareMapperVarDeclAllowed(const VarDecl *VD) const { return true; if (VD->isUsableInConstantExpressions(Context)) return true; + if (LangOpts.OpenMP >= 52 && DSAStack->isIteratorVarDecl(VD)) + return true; return false; } return true; @@ -22664,7 +22714,8 @@ static void checkDeclInTargetContext(SourceLocation SL, SourceRange SR, (SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true) || SemaRef.getCurBlock() || SemaRef.getCurCapturedRegion()) && VD->hasGlobalStorage()) { - if (!MapTy || *MapTy != OMPDeclareTargetDeclAttr::MT_To) { + if (!MapTy || (*MapTy != OMPDeclareTargetDeclAttr::MT_To && + *MapTy != OMPDeclareTargetDeclAttr::MT_Enter)) { // OpenMP 5.0, 2.12.7 declare target Directive, Restrictions // If a lambda declaration and definition appears between a // declare target directive and the matching end declare target @@ -22745,8 +22796,11 @@ void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, IsIndirect = true; } auto *A = OMPDeclareTargetDeclAttr::CreateImplicit( - Context, OMPDeclareTargetDeclAttr::MT_To, DTCI.DT, IndirectE, - IsIndirect, Level, SourceRange(DTCI.Loc, DTCI.Loc)); + Context, + getLangOpts().OpenMP >= 52 ? OMPDeclareTargetDeclAttr::MT_Enter + : OMPDeclareTargetDeclAttr::MT_To, + DTCI.DT, IndirectE, IsIndirect, Level, + SourceRange(DTCI.Loc, DTCI.Loc)); D->addAttr(A); if (ASTMutationListener *ML = Context.getASTMutationListener()) ML->DeclarationMarkedOpenMPDeclareTarget(D, A); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index de2bb7734bc99..baa29bcbae718 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3680,9 +3680,10 @@ TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(), OldC->getEndLoc()); OMPClause *NewC = SemaRef.ActOnOpenMPMapClause( - OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS, - NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(), - OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs); + OldC->getIteratorModifier(), OldC->getMapTypeModifiers(), + OldC->getMapTypeModifiersLoc(), SS, NewNameInfo, OldC->getMapType(), + OldC->isImplicitMapType(), OldC->getMapLoc(), OldC->getColonLoc(), + NewVars, Locs); Clauses.push_back(NewC); } SemaRef.EndOpenMPDSABlock(nullptr); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index f0d3a5ca089a3..358f95fed7f60 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1988,15 +1988,16 @@ class TreeTransform { /// By default, performs semantic analysis to build the new OpenMP clause. /// Subclasses may override this routine to provide different behavior. OMPClause *RebuildOMPMapClause( - ArrayRef MapTypeModifiers, + Expr *IteratorModifier, ArrayRef MapTypeModifiers, ArrayRef MapTypeModifiersLoc, CXXScopeSpec MapperIdScopeSpec, DeclarationNameInfo MapperId, OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { return getSema().ActOnOpenMPMapClause( - MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId, - MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs, + IteratorModifier, MapTypeModifiers, MapTypeModifiersLoc, + MapperIdScopeSpec, MapperId, MapType, IsMapTypeImplicit, MapLoc, + ColonLoc, VarList, Locs, /*NoDiagnose=*/false, UnresolvedMappers); } @@ -10227,6 +10228,13 @@ template OMPClause *TreeTransform::TransformOMPMapClause(OMPMapClause *C) { OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); llvm::SmallVector Vars; + Expr *IteratorModifier = C->getIteratorModifier(); + if (IteratorModifier) { + ExprResult MapModRes = getDerived().TransformExpr(IteratorModifier); + if (MapModRes.isInvalid()) + return nullptr; + IteratorModifier = MapModRes.get(); + } CXXScopeSpec MapperIdScopeSpec; DeclarationNameInfo MapperIdInfo; llvm::SmallVector UnresolvedMappers; @@ -10234,9 +10242,9 @@ OMPClause *TreeTransform::TransformOMPMapClause(OMPMapClause *C) { *this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers)) return nullptr; return getDerived().RebuildOMPMapClause( - C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), MapperIdScopeSpec, - MapperIdInfo, C->getMapType(), C->isImplicitMapType(), C->getMapLoc(), - C->getColonLoc(), Vars, Locs, UnresolvedMappers); + IteratorModifier, C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), + MapperIdScopeSpec, MapperIdInfo, C->getMapType(), C->isImplicitMapType(), + C->getMapLoc(), C->getColonLoc(), Vars, Locs, UnresolvedMappers); } template diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 954f8ccebb82e..c43a885f88f47 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -10675,10 +10675,13 @@ void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) { void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) { C->setLParenLoc(Record.readSourceLocation()); + bool HasIteratorModifier = false; for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) { C->setMapTypeModifier( I, static_cast(Record.readInt())); C->setMapTypeModifierLoc(I, Record.readSourceLocation()); + if (C->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_iterator) + HasIteratorModifier = true; } C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc()); C->setMapperIdInfo(Record.readDeclarationNameInfo()); @@ -10703,6 +10706,9 @@ void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) { UDMappers.push_back(Record.readExpr()); C->setUDMapperRefs(UDMappers); + if (HasIteratorModifier) + C->setIteratorModifier(Record.readExpr()); + SmallVector Decls; Decls.reserve(UniqueDecls); for (unsigned i = 0; i < UniqueDecls; ++i) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b70eb9526e19b..e740006ca8fc5 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6617,9 +6617,12 @@ void OMPClauseWriter::VisitOMPMapClause(OMPMapClause *C) { Record.push_back(C->getTotalComponentListNum()); Record.push_back(C->getTotalComponentsNum()); Record.AddSourceLocation(C->getLParenLoc()); + bool HasIteratorModifier = false; for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) { Record.push_back(C->getMapTypeModifier(I)); Record.AddSourceLocation(C->getMapTypeModifierLoc(I)); + if (C->getMapTypeModifier(I) == OMPC_MAP_MODIFIER_iterator) + HasIteratorModifier = true; } Record.AddNestedNameSpecifierLoc(C->getMapperQualifierLoc()); Record.AddDeclarationNameInfo(C->getMapperIdInfo()); @@ -6630,6 +6633,8 @@ void OMPClauseWriter::VisitOMPMapClause(OMPMapClause *C) { Record.AddStmt(E); for (auto *E : C->mapperlists()) Record.AddStmt(E); + if (HasIteratorModifier) + Record.AddStmt(C->getIteratorModifier()); for (auto *D : C->all_decls()) Record.AddDeclRef(D); for (auto N : C->all_num_lists()) diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu index 16505b34c4a6e..62ccc2bd4d05d 100644 --- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu +++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu @@ -1,7 +1,7 @@ // Create module flag for code object version. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -o - %s | FileCheck %s -check-prefix=V4 +// RUN: -o - %s | FileCheck %s -check-prefix=V5 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ // RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu index 4d788e6807ab2..847be23ba8e48 100644 --- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu +++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefix=PRECOV5 %s diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu index 2278c26f0bcfd..f996aa24e470b 100644 --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -195,7 +195,7 @@ __device__ void func(float *x); // CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) // CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_Z4funcPf(float* noundef [[TMP5]]) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: call void @_Z4funcPf(float* [[TMP5]]) #[[ATTR8:[0-9]+]] // CHECK-NEXT: ret void // __global__ void test_ds_fmin_func(float src, float *__restrict shared) { diff --git a/clang/test/CodeGenCUDA/lambda-noinline.cu b/clang/test/CodeGenCUDA/lambda-noinline.cu new file mode 100644 index 0000000000000..de2196e63f074 --- /dev/null +++ b/clang/test/CodeGenCUDA/lambda-noinline.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Checks noinline is correctly added to the lambda function. + +// HOST: define{{.*}}@_ZZ4HostvENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// HOST: attributes #[[ATTR]]{{.*}}noinline + +// DEV: define{{.*}}@_ZZ6DevicevENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// DEV: attributes #[[ATTR]]{{.*}}noinline + +__device__ int a; +int b; + +__device__ int Device() { return ([&] __device__ __noinline__ (){ return a; })(); } + +__host__ int Host() { return ([&] __host__ __noinline__ (){ return b; })(); } diff --git a/clang/test/CodeGenCUDA/lambda.cu b/clang/test/CodeGenCUDA/lambda.cu index c2012dc963558..01895d50b6810 100644 --- a/clang/test/CodeGenCUDA/lambda.cu +++ b/clang/test/CodeGenCUDA/lambda.cu @@ -51,8 +51,8 @@ // DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_ // DEV: call void @_ZZ12test_resolvevENKUlvE_clEv // DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv -// DEV: call noundef i32 @_Z10overloadedIiET_v -// DEV-LABEL: define linkonce_odr noundef i32 @_Z10overloadedIiET_v +// DEV: call i32 @_Z10overloadedIiET_v +// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v // DEV: ret i32 1 __device__ int a; diff --git a/clang/test/CodeGenCUDA/unnamed-types.cu b/clang/test/CodeGenCUDA/unnamed-types.cu index 6849df5a184ba..b59d5f448dde2 100644 --- a/clang/test/CodeGenCUDA/unnamed-types.cu +++ b/clang/test/CodeGenCUDA/unnamed-types.cu @@ -19,16 +19,16 @@ __device__ float d1(float x) { } // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_( -// DEVICE: define internal noundef float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf( +// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf( template __global__ void k0(float *p, F f) { p[0] = f(p[0]) + d0(p[1]) + d1(p[2]); } // DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_( -// DEVICE: define internal noundef float @_ZZ2f1PfENKUlfE_clEf( -// DEVICE: define internal noundef float @_ZZ2f1PfENKUlffE_clEff( -// DEVICE: define internal noundef float @_ZZ2f1PfENKUlfE0_clEf( +// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf( +// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff( +// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf( template __global__ void k1(float *p, F0 f0, F1 f1, F2 f2) { p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]); diff --git a/clang/test/CodeGenHIP/debug-info-amdgcn-abi-heterogeneous-dwarf.hip b/clang/test/CodeGenHIP/debug-info-amdgcn-abi-heterogeneous-dwarf.hip index bdcc1da781d69..4edbbdef72391 100644 --- a/clang/test/CodeGenHIP/debug-info-amdgcn-abi-heterogeneous-dwarf.hip +++ b/clang/test/CodeGenHIP/debug-info-amdgcn-abi-heterogeneous-dwarf.hip @@ -169,7 +169,7 @@ __device__ void Test_Func_StructTrivialCopyNoMove(StructTrivialCopyNoMove) {} // CHECK-NOT: {{.*}}memcpy{{.*}} // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %[[#ALLOCA]]), __global__ void Test_Kern_StructTrivialCopyNoMove(StructTrivialCopyNoMove) {} -// CHECK: define dso_local void @_Z28Test_Func_StructNoCopyNoMove18StructNoCopyNoMove(ptr addrspace(5) noundef %[[#ARG:]]) +// CHECK: define dso_local void @_Z28Test_Func_StructNoCopyNoMove18StructNoCopyNoMove(ptr addrspace(5) %[[#ARG:]]) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %[[#ARG]]), __device__ void Test_Func_StructNoCopyNoMove(StructNoCopyNoMove) {} // CHECK: define dso_local amdgpu_kernel void @_Z28Test_Kern_StructNoCopyNoMove18StructNoCopyNoMove(i8 %.coerce) @@ -277,7 +277,7 @@ __device__ void Test_Func_Struct9Bytes(StructNBytes<9>) {} // CHECK: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %{{.+}}, ptr addrspace(4) align 1 %{{.+}}, i64 9, i1 false) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %coerce), __global__ void Test_Kern_Struct9Bytes(StructNBytes<9>) {} -// CHECK: define dso_local void @_Z23Test_Func_Struct64Bytes12StructNBytesILj64EE(ptr addrspace(5) noundef byval(%struct.StructNBytes.7) align 1 %0) +// CHECK: define dso_local void @_Z23Test_Func_Struct64Bytes12StructNBytesILj64EE(ptr addrspace(5) byval(%struct.StructNBytes.7) align 1 %0) // CHECK-NOT: alloca // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %0), __device__ void Test_Func_Struct64Bytes(StructNBytes<64>) {} @@ -287,196 +287,196 @@ __device__ void Test_Func_Struct64Bytes(StructNBytes<64>) {} // CHECK: call void @llvm.memcpy.p0.p4.i64(ptr align 1 %{{.+}}, ptr addrspace(4) align 1 %{{.+}}, i64 64, i1 false) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %coerce), __global__ void Test_Kern_Struct64Bytes(StructNBytes<64>) {} -// CHECK: define dso_local void @_Z15Test_Func_Int8Tc(i8 noundef signext %0) +// CHECK: define dso_local void @_Z15Test_Func_Int8Tc(i8 signext %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Int8T(int8_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z15Test_Kern_Int8Tc(i8 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z15Test_Kern_Int8Tc(i8 %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_Int8T(int8_t) {} -// CHECK: define dso_local void @_Z16Test_Func_UInt8Th(i8 noundef zeroext %0) +// CHECK: define dso_local void @_Z16Test_Func_UInt8Th(i8 zeroext %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_UInt8T(uint8_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_UInt8Th(i8 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_UInt8Th(i8 %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_UInt8T(uint8_t) {} -// CHECK: define dso_local void @_Z16Test_Func_Int16Ts(i16 noundef signext %0) +// CHECK: define dso_local void @_Z16Test_Func_Int16Ts(i16 signext %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Int16T(int16_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int16Ts(i16 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int16Ts(i16 %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_Int16T(int16_t) {} -// CHECK: define dso_local void @_Z17Test_Func_UInt16Tt(i16 noundef zeroext %0) +// CHECK: define dso_local void @_Z17Test_Func_UInt16Tt(i16 zeroext %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_UInt16T(uint16_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt16Tt(i16 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt16Tt(i16 %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_UInt16T(uint16_t) {} -// CHECK: define dso_local void @_Z16Test_Func_Int32Ti(i32 noundef %0) +// CHECK: define dso_local void @_Z16Test_Func_Int32Ti(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Int32T(int32_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int32Ti(i32 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int32Ti(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_Int32T(int32_t) {} -// CHECK: define dso_local void @_Z17Test_Func_UInt32Tj(i32 noundef %0) +// CHECK: define dso_local void @_Z17Test_Func_UInt32Tj(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_UInt32T(uint32_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt32Tj(i32 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt32Tj(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_UInt32T(uint32_t) {} -// CHECK: define dso_local void @_Z16Test_Func_Int64Tl(i64 noundef %0) +// CHECK: define dso_local void @_Z16Test_Func_Int64Tl(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Int64T(int64_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int64Tl(i64 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z16Test_Kern_Int64Tl(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_Int64T(int64_t) {} -// CHECK: define dso_local void @_Z17Test_Func_UInt64Tm(i64 noundef %0) +// CHECK: define dso_local void @_Z17Test_Func_UInt64Tm(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_UInt64T(uint64_t) {} -// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt64Tm(i64 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_UInt64Tm(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_UInt64T(uint64_t) {} -// CHECK: define dso_local void @_Z19Test_Func_EnumInt8T9EnumInt8T(i8 noundef signext %0) +// CHECK: define dso_local void @_Z19Test_Func_EnumInt8T9EnumInt8T(i8 signext %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumInt8T(EnumInt8T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z19Test_Kern_EnumInt8T9EnumInt8T(i8 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z19Test_Kern_EnumInt8T9EnumInt8T(i8 %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumInt8T(EnumInt8T) {} -// CHECK: define dso_local void @_Z20Test_Func_EnumUInt8T10EnumUInt8T(i8 noundef zeroext %0) +// CHECK: define dso_local void @_Z20Test_Func_EnumUInt8T10EnumUInt8T(i8 zeroext %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumUInt8T(EnumUInt8T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumUInt8T10EnumUInt8T(i8 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumUInt8T10EnumUInt8T(i8 %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: store i8 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumUInt8T(EnumUInt8T) {} -// CHECK: define dso_local void @_Z20Test_Func_EnumInt16T10EnumInt16T(i16 noundef signext %0) +// CHECK: define dso_local void @_Z20Test_Func_EnumInt16T10EnumInt16T(i16 signext %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumInt16T(EnumInt16T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt16T10EnumInt16T(i16 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt16T10EnumInt16T(i16 %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumInt16T(EnumInt16T) {} -// CHECK: define dso_local void @_Z21Test_Func_EnumUInt16T11EnumUInt16T(i16 noundef zeroext %0) +// CHECK: define dso_local void @_Z21Test_Func_EnumUInt16T11EnumUInt16T(i16 zeroext %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumUInt16T(EnumUInt16T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt16T11EnumUInt16T(i16 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt16T11EnumUInt16T(i16 %0) // CHECK: %.addr = alloca i16, align 2, addrspace(5) // CHECK: store i16 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumUInt16T(EnumUInt16T) {} -// CHECK: define dso_local void @_Z20Test_Func_EnumInt32T10EnumInt32T(i32 noundef %0) +// CHECK: define dso_local void @_Z20Test_Func_EnumInt32T10EnumInt32T(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumInt32T(EnumInt32T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt32T10EnumInt32T(i32 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt32T10EnumInt32T(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumInt32T(EnumInt32T) {} -// CHECK: define dso_local void @_Z21Test_Func_EnumUInt32T11EnumUInt32T(i32 noundef %0) +// CHECK: define dso_local void @_Z21Test_Func_EnumUInt32T11EnumUInt32T(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumUInt32T(EnumUInt32T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt32T11EnumUInt32T(i32 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt32T11EnumUInt32T(i32 %0) // CHECK: %.addr = alloca i32, align 4, addrspace(5) // CHECK: store i32 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumUInt32T(EnumUInt32T) {} -// CHECK: define dso_local void @_Z20Test_Func_EnumInt64T10EnumInt64T(i64 noundef %0) +// CHECK: define dso_local void @_Z20Test_Func_EnumInt64T10EnumInt64T(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumInt64T(EnumInt64T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt64T10EnumInt64T(i64 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z20Test_Kern_EnumInt64T10EnumInt64T(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumInt64T(EnumInt64T) {} -// CHECK: define dso_local void @_Z21Test_Func_EnumUInt64T11EnumUInt64T(i64 noundef %0) +// CHECK: define dso_local void @_Z21Test_Func_EnumUInt64T11EnumUInt64T(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_EnumUInt64T(EnumUInt64T) {} -// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt64T11EnumUInt64T(i64 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z21Test_Kern_EnumUInt64T11EnumUInt64T(i64 %0) // CHECK: %.addr = alloca i64, align 8, addrspace(5) // CHECK: store i64 %0, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_EnumUInt64T(EnumUInt64T) {} -// CHECK: define dso_local void @_Z27Test_Func_PromotableIntegerb(i1 noundef zeroext %0) +// CHECK: define dso_local void @_Z27Test_Func_PromotableIntegerb(i1 zeroext %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: %frombool = zext i1 %0 to i8 // CHECK: store i8 %frombool, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_PromotableInteger(bool) {} -// CHECK: define dso_local amdgpu_kernel void @_Z27Test_Kern_PromotableIntegerb(i1 noundef %0) +// CHECK: define dso_local amdgpu_kernel void @_Z27Test_Kern_PromotableIntegerb(i1 %0) // CHECK: %.addr = alloca i8, align 1, addrspace(5) // CHECK: %frombool = zext i1 %0 to i8 // CHECK: store i8 %frombool, // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_PromotableInteger(bool) {} -// CHECK: define dso_local void @_Z17Test_Func_PointerPi(ptr noundef %0) +// CHECK: define dso_local void @_Z17Test_Func_PointerPi(ptr %0) // CHECK: %.addr = alloca ptr, align 8, addrspace(5) // CHECK: store ptr %0, ptr %.addr.ascast, align 8 // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Pointer(int32_t *) {} -// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_PointerPi(ptr addrspace(1) noundef %.coerce) +// CHECK: define dso_local amdgpu_kernel void @_Z17Test_Kern_PointerPi(ptr addrspace(1) %.coerce) // CHECK: %.addr = alloca ptr, align 8, addrspace(5) // FIXME: There is a store, load, store sequence through another alloca here, // which I don't understand the intent of // CHECK: store ptr // call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __global__ void Test_Kern_Pointer(int32_t *) {} -// CHECK: define dso_local void @_Z19Test_Func_ReferenceRi(ptr noundef nonnull align 4 dereferenceable(4) %0) +// CHECK: define dso_local void @_Z19Test_Func_ReferenceRi(ptr nonnull align 4 dereferenceable(4) %0) // CHECK: %.addr = alloca ptr, align 8, addrspace(5) // CHECK: store ptr %0, ptr %.addr.ascast, align 8 // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %.addr), __device__ void Test_Func_Reference(int32_t &) {} -// CHECK: define dso_local amdgpu_kernel void @_Z19Test_Kern_ReferenceRi(ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) %.coerce) +// CHECK: define dso_local amdgpu_kernel void @_Z19Test_Kern_ReferenceRi(ptr addrspace(1) nonnull align 4 dereferenceable(4) %.coerce) // CHECK: %.addr = alloca ptr, align 8, addrspace(5) // FIXME: There is a store, load, store sequence through another alloca here, // which I don't understand the intent of @@ -504,23 +504,23 @@ __device__ void Test_Func_StructPointerElements(StructPointerElements) {} // CHECK: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %{{.+}}, ptr addrspace(4) align 8 %{{.+}}, i64 16, i1 false) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %coerce), __global__ void Test_Kern_StructPointerElements(StructPointerElements) {} -// CHECK: define dso_local void @_Z37Test_Func_ParamRegLimitExpandedStructlllllli22StructMultipleElements(i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3, i64 noundef %4, i64 noundef %5, i32 noundef %6, i32 %.coerce0, i64 %.coerce1) +// CHECK: define dso_local void @_Z37Test_Func_ParamRegLimitExpandedStructlllllli22StructMultipleElements(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i32 %6, i32 %.coerce0, i64 %.coerce1) // CHECK: %[[#ALLOCA:]] = alloca %struct.StructMultipleElements, align 8, addrspace(5) // CHECK: store i32 %.coerce0, // CHECK: store i64 %.coerce1, // CHECK-NOT: {{.*}}memcpy{{.*}} // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %[[#ALLOCA]]), __device__ void Test_Func_ParamRegLimitExpandedStruct(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int32_t, StructMultipleElements) {} -// CHECK: define dso_local amdgpu_kernel void @_Z37Test_Kern_ParamRegLimitExpandedStructlllllli22StructMultipleElements(i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3, i64 noundef %4, i64 noundef %5, i32 noundef %6, ptr addrspace(4) byref(%struct.StructMultipleElements) align 8 %7) +// CHECK: define dso_local amdgpu_kernel void @_Z37Test_Kern_ParamRegLimitExpandedStructlllllli22StructMultipleElements(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i32 %6, ptr addrspace(4) byref(%struct.StructMultipleElements) align 8 %7) // CHECK: %coerce = alloca %struct.StructMultipleElements, align 8, addrspace(5) // CHECK: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %{{.+}}, ptr addrspace(4) align 8 %{{.+}}, i64 16, i1 false) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %coerce), __global__ void Test_Kern_ParamRegLimitExpandedStruct(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int32_t, StructMultipleElements) {} -// CHECK: define dso_local void @_Z39Test_Func_ParamRegLimitUnexpandedStructlllllll22StructMultipleElements(i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3, i64 noundef %4, i64 noundef %5, i64 noundef %6, ptr addrspace(5) noundef byval(%struct.StructMultipleElements) align 8 %7) +// CHECK: define dso_local void @_Z39Test_Func_ParamRegLimitUnexpandedStructlllllll22StructMultipleElements(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, ptr addrspace(5) byval(%struct.StructMultipleElements) align 8 %7) // CHECK-NOT: {{.*}}memcpy{{.*}} // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %7), __device__ void Test_Func_ParamRegLimitUnexpandedStruct(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, StructMultipleElements) {} -// CHECK: define dso_local amdgpu_kernel void @_Z39Test_Kern_ParamRegLimitUnexpandedStructlllllll22StructMultipleElements(i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3, i64 noundef %4, i64 noundef %5, i64 noundef %6, ptr addrspace(4) byref(%struct.StructMultipleElements) align 8 %7) +// CHECK: define dso_local amdgpu_kernel void @_Z39Test_Kern_ParamRegLimitUnexpandedStructlllllll22StructMultipleElements(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, ptr addrspace(4) byref(%struct.StructMultipleElements) align 8 %7) // CHECK: %coerce = alloca %struct.StructMultipleElements, align 8, addrspace(5) // CHECK: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %{{.+}}, ptr addrspace(4) align 8 %{{.+}}, i64 16, i1 false) // CHECK: call void @llvm.dbg.def(metadata !{{[0-9]+}}, metadata ptr addrspace(5) %coerce), diff --git a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp index 486e1606e7ba3..72071973a81de 100644 --- a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp +++ b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp @@ -25,30 +25,30 @@ __device__ struct foo_t { // Check literals are placed in address space 1 (CrossWorkGroup/__global). // CHECK: @.str ={{.*}} unnamed_addr addrspace(1) constant -// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z3barPi(i32 addrspace(4)* +// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z3barPi(i32 addrspace(4)* __device__ int* bar(int *x) { return x; } -// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_dv() +// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_dv() __device__ int* baz_d() { // CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @d to i32 addrspace(4)* return &d; } -// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_cv() +// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_cv() __device__ int* baz_c() { // CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @c to i32 addrspace(4)* return &c; } -// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_sv() +// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_sv() __device__ int* baz_s() { // CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @s to i32 addrspace(4)* return &s; } -// CHECK: define{{.*}} spir_func noundef i8 addrspace(4)* @_Z3quzv() +// CHECK: define{{.*}} spir_func i8 addrspace(4)* @_Z3quzv() __device__ const char* quz() { return "abc"; } diff --git a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip index afa461f909529..b5cbbd52497ea 100644 --- a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip +++ b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -9,11 +9,11 @@ // CHECK-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(5)* [[TMP2:%.*]] to i32* // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3:%.*]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = freeze i32 [[TMP5:%.*]] -// CHECK-NEXT: %call = call noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP6:%.*]], i32 noundef 64, i32 noundef 0) #4 +// CHECK-NEXT: %call = call i32 @_Z11__shfl_synciii(i32 [[TMP6:%.*]], i32 64, i32 0) #4 // CHECK-NEXT: store i32 %call, i32* [[TMP4:%.*]], align 4 // CHECK-NEXT: ret void -// CHECK: define linkonce_odr noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]]) +// CHECK: define linkonce_odr i32 @_Z11__shfl_synciii(i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) #define __global__ __attribute__((global)) #define __device__ __attribute__((device)) diff --git a/clang/test/CodeGenHIP/noundef-attr-verify.hip b/clang/test/CodeGenHIP/noundef-attr-verify.hip new file mode 100644 index 0000000000000..985f7a773a8c5 --- /dev/null +++ b/clang/test/CodeGenHIP/noundef-attr-verify.hip @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \ +// RUN: -o - | FileCheck %s + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) +#define WARP_SIZE 64 + +static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; + +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} + +__device__ +inline +int __shfl(int var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} + +template +static __device__ +T __shfl_sync(unsigned mask, T val, int src_line, int width=WARP_SIZE) +{ + return __shfl(val, src_line, width); +} + +// CHECK-LABEL: @_Z13shufflekernelv( +// CHECK: call i32 @_ZL11__shfl_syncIiET_jS0_ii(i32 64, i32 %0, i32 0, i32 64) + +__global__ void +shufflekernel() +{ + int res, t; + res = __shfl_sync(WARP_SIZE, t, 0); +} diff --git a/clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip b/clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip new file mode 100644 index 0000000000000..c071d197b336b --- /dev/null +++ b/clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip @@ -0,0 +1,21 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -munsafe-fp-atomics -target-cpu gfx90a -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) + +// CHECK-LABEL: @_Z15unsafeAtomicAddPff(ptr %addr, float %value +__device__ inline float unsafeAtomicAdd(float* addr, float value) { + // CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5) + // CHECK: %[[ADDR_ADDR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr + // CHECK: %[[ADDR_PTR:.*]] = load ptr, ptr %[[ADDR_ADDR_ASCAST]], align 8 + // CHECK: %[[ADDR:.*]] = addrspacecast ptr %[[ADDR_PTR]] to ptr addrspace(3) + // CHECK: call contract float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %[[ADDR]] + return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value); +} + +__global__ void test_global_atomic_add_f32(float *val){ + float *rtn; + *rtn = unsafeAtomicAdd(val, 1.0); +} diff --git a/clang/test/CodeGenOpenCL/amdgpu-debug-info-struct-function-arg.cl b/clang/test/CodeGenOpenCL/amdgpu-debug-info-struct-function-arg.cl new file mode 100644 index 0000000000000..0f8764ad30c13 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-debug-info-struct-function-arg.cl @@ -0,0 +1,36 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang -g -target amdgcn-amd-amdhsa -march=gfx900 -O0 -nogpulib %s -c -o - | llvm-dwarfdump -v -debug-info - | FileCheck "%s" +// CHECK: DW_TAG_subprogram +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "foo") +// +// CHECK: DW_TAG_formal_parameter +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "data") +// CHECK: DW_AT_type [DW_FORM_ref4] +// CHECK-SAME: (cu + 0x{{[0-9a-f]+}} => {0x[[BAR_OFFSET:[0-9a-f]+]]} "bar") +// +// CHECK: DW_TAG_variable +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "offset") +// +// CHECK: 0x[[BAR_OFFSET]]: DW_TAG_structure_type +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "bar") +// +// CHECK: DW_TAG_member +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "C") +// +// CHECK: DW_TAG_member +// CHECK: DW_AT_name [DW_FORM_strx1] +// CHECK-SAME: (indexed ({{[0-9a-f]+}}) string = "A") +struct bar { + __global unsigned *C; + __global unsigned *A; +}; + +void foo(struct bar data) { + unsigned offset = get_global_id(0); + data.C[offset] = data.A[offset]; +} diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index ff288e530d17f..8106788727b8b 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -76,9 +76,9 @@ // GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" +// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" +// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" +// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" +// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" kernel void test() {} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl index 068ecb1ee444c..dc7069decaaa6 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl @@ -14,14 +14,10 @@ typedef unsigned short __attribute__((ext_vector_type(2))) ushort2; // CHECK: call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i16 %sC) // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 false) // CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 true) -// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 false) -// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 true) // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false) // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true) // CHECK: call i32 @llvm.amdgcn.sudot4(i1 true, i32 %A, i1 false, i32 %B, i32 %C, i1 false) // CHECK: call i32 @llvm.amdgcn.sudot4(i1 false, i32 %A, i1 true, i32 %B, i32 %C, i1 true) -// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 false) -// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 true) // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 false) // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 true) // CHECK: call i32 @llvm.amdgcn.sudot8(i1 false, i32 %A, i1 true, i32 %B, i32 %C, i1 false) @@ -44,18 +40,12 @@ kernel void builtins_amdgcn_dl_insts_err( fOut[3] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, false); fOut[4] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, true); - siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); - siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); - uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); iOut[0] = __builtin_amdgcn_sudot4(true, A, false, B, C, false); iOut[1] = __builtin_amdgcn_sudot4(false, A, true, B, C, true); - siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); - siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); - uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 9696f3536e2f6..4d66bbc574f96 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -583,13 +583,13 @@ void test_get_local_id(int d, global int *out) } // CHECK-LABEL: @test_get_workgroup_size( -// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 4 +// CHECK: call align 8 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 12 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 6 +// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 14 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 8 -// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load +// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 16 +// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load void test_get_workgroup_size(int d, global int *out) { switch (d) { diff --git a/clang/test/Driver/clang-offload-bundler-asserts-on.c b/clang/test/Driver/clang-offload-bundler-asserts-on.c index 4b14ad310d2e9..5c7b755c83bfa 100644 --- a/clang/test/Driver/clang-offload-bundler-asserts-on.c +++ b/clang/test/Driver/clang-offload-bundler-asserts-on.c @@ -16,13 +16,13 @@ // // Create few code object bundles and archive them to create an input archive // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -output=%t.simple.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+ -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID1.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack- -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID2.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:xnack- -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID3.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+ -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID1.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack- -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID2.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:xnack- -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID3.bundle // RUN: llvm-ar cr %t.input-archive.a %t.simple.bundle %t.targetID1.bundle %t.targetID2.bundle %t.targetID3.bundle // Tests to check compatibility between Bundle Entry ID formats i.e. between presence/absence of extra hyphen in case of missing environment field -// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa-gfx908:sramecc+:xnack+ -inputs=%t.input-archive.a -outputs=%t-archive-gfx906-simple.a,%t-archive-gfx908-simple.a -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLECOMPATIBILITY +// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa-gfx908:sramecc+:xnack+ -input=%t.input-archive.a -output=%t-archive-gfx906-simple.a -output=%t-archive-gfx908-simple.a -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLECOMPATIBILITY // BUNDLECOMPATIBILITY: Compatible: Exact match: [CodeObject: openmp-amdgcn-amd-amdhsa-gfx906] : [Target: openmp-amdgcn-amd-amdhsa--gfx906] // BUNDLECOMPATIBILITY: Incompatible: Processor mismatch [CodeObject: openmp-amdgcn-amd-amdhsa-gfx906] : [Target: openmp-amdgcn-amd-amdhsa-gfx908:sramecc+:xnack+] // BUNDLECOMPATIBILITY: Incompatible: Processor mismatch [CodeObject: openmp-amdgcn-amd-amdhsa--gfx908] : [Target: openmp-amdgcn-amd-amdhsa--gfx906] diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index a86fb5ea2d249..c2f2c3cdb2a6e 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -471,13 +471,13 @@ // // Create few code object bundles and archive them to create an input archive // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa-gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -output=%t.simple.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+ -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID1.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack- -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID2.bundle -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:xnack- -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID3.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+ -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID1.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:sramecc+:xnack- -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID2.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906:xnack-,openmp-amdgcn-amd-amdhsa--gfx908:xnack- -input=%t.o -input=%t.tgt1 -input=%t.tgt1 -output=%t.targetID3.bundle // RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906:xnack-,hip-amdgcn-amd-amdhsa--gfx908:xnack- -inputs=%t.o,%t.tgt1,%t.tgt1 -outputs=%t.targetID4.bundle // RUN: llvm-ar cr %t.input-archive.a %t.simple.bundle %t.targetID1.bundle %t.targetID2.bundle %t.targetID3.bundle %t.targetID4.bundle -// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -inputs=%t.input-archive.a -outputs=%t-archive-gfx906-simple.a,%t-archive-gfx908-simple.a +// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -input=%t.input-archive.a -output=%t-archive-gfx906-simple.a -output=%t-archive-gfx908-simple.a // RUN: llvm-ar t %t-archive-gfx906-simple.a | FileCheck %s -check-prefix=GFX906 // RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa-gfx906:xnack+ -input=%t.input-archive.a -output=%t-archive-gfx906-simple.a // RUN: llvm-ar t %t-archive-gfx906-simple.a | FileCheck %s -check-prefix=GFX906 diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index d276f6ea47244..8535705a09694 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -139,13 +139,13 @@ // Test default code object version. // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5 // Test default code object version with old device library without abi_version_400.bc // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \ // RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4 +// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5 // Test -mcode-object-version=3 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \ diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip index cbc0164069651..9dbb1f21fcc25 100644 --- a/clang/test/Driver/hip-toolchain-device-only.hip +++ b/clang/test/Driver/hip-toolchain-device-only.hip @@ -12,7 +12,7 @@ // CHECK-SAME: "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip" -// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" @@ -21,7 +21,7 @@ // CHECK-SAME: "-target-cpu" "gfx900" // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip" -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 2cd44ca78eb8d..4ae054b62fb7f 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -59,7 +59,7 @@ // CHECK-NOT: {{".*opt"}} // CHECK-NOT: {{".*llc"}} -// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]] // @@ -82,7 +82,7 @@ // CHECK-NOT: {{".*opt"}} // CHECK-NOT: {{".*llc"}} -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]] // @@ -122,7 +122,7 @@ // CHECK-NOT: {{".*opt"}} // CHECK-NOT: {{".*llc"}} -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]] // @@ -145,7 +145,7 @@ // CHECK-NOT: {{".*opt"}} // CHECK-NOT: {{".*llc"}} -// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared" +// CHECK: [[LLD]] "-flavor" "gnu" "-m" "elf64_amdgpu" "--no-undefined" "-shared" // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]] // diff --git a/clang/test/Driver/openmp-target-fast-flag.c b/clang/test/Driver/openmp-target-fast-flag.c index 83ed0bb757249..f1f4b2862001a 100644 --- a/clang/test/Driver/openmp-target-fast-flag.c +++ b/clang/test/Driver/openmp-target-fast-flag.c @@ -7,7 +7,7 @@ // RUN: | FileCheck -check-prefixes=TFast,EnV,TState,NestParallel %s // RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O4 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=O4,NoTFast,EnV,NoTState,NoNestParallel %s +// RUN: | FileCheck -check-prefixes=O4,NoTFast,NoEnV,NoTState,NoNestParallel %s // RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O4 -fno-openmp-target-fast %s 2>&1 \ // RUN: | FileCheck -check-prefixes=O4,NoTFast,NoEnV,NoTState,NoNestParallel %s @@ -43,4 +43,4 @@ // NestParallel: -fopenmp-assume-no-nested-parallelism // NestParallel-NOT: -fno-openmp-assume-no-nested-parallelism // NoNestParallel: -fno-openmp-assume-no-nested-parallelism -// NoNestParallel-NOT: -fopenmp-assume-no-nested-parallelism \ No newline at end of file +// NoNestParallel-NOT: -fopenmp-assume-no-nested-parallelism diff --git a/clang/test/Driver/rocm-detect.hip b/clang/test/Driver/rocm-detect.hip index c3ffd21a75b9b..c5f360c5109ac 100644 --- a/clang/test/Driver/rocm-detect.hip +++ b/clang/test/Driver/rocm-detect.hip @@ -82,7 +82,6 @@ // SPACK: ROCm installation search path (Spack 4.0.0): [[DIR:.*]] // SPACK: ROCm installation search path: [[CLANG:.*]] -// SPACK: ROCm installation search path: [[DIR]]/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z // SPACK: ROCm installation search path: [[DIR]]/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/lib/clang // SPACK: ROCm installation search path: /opt/rocm // SPACK: InstalledDir: [[DIR]]/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/bin diff --git a/clang/test/OpenMP/big_jump_loop_codegen.cpp b/clang/test/OpenMP/big_jump_loop_codegen.cpp new file mode 100644 index 0000000000000..b5ae3a8a2c4bf --- /dev/null +++ b/clang/test/OpenMP/big_jump_loop_codegen.cpp @@ -0,0 +1,116 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-ignore-env-vars -fopenmp-assume-no-nested-parallelism -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-ignore-env-vars -fopenmp-assume-no-nested-parallelism -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +T tmain(T argc, T *argv) { + int N = 100; + int v[N]; + #pragma omp target map(iterator(it = 0:N:2), to: v[it]) + foo(); + #pragma omp target map(iterator(it = 0:N:4), from: v[it]) + foo(); + + return 0; +} + +// OMP52: template T tmain(T argc, T *argv) { +// OMP52-NEXT: int N = 100; +// OMP52-NEXT: int v[N]; +// OMP52-NEXT: #pragma omp target map(iterator(int it = 0:N:2),to: v[it]) +// OMP52-NEXT: foo() +// OMP52-NEXT: #pragma omp target map(iterator(int it = 0:N:4),from: v[it]) +// OMP52-NEXT: foo() + +// OMP52-LABEL: int main(int argc, char **argv) { +int main (int argc, char **argv) { + int i, j, a[20], always, close; +// OMP52-NEXT: int i, j, a[20] +#pragma omp target +// OMP52-NEXT: #pragma omp target + foo(); +// OMP52-NEXT: foo(); +#pragma omp target map(iterator(it = 0:20:2), to: a[it]) +// OMP52-NEXT: #pragma omp target map(iterator(int it = 0:20:2),to: a[it]) + foo(); +// OMP52-NEXT: foo(); +#pragma omp target map(iterator(it = 0:20:4), from: a[it]) +// OMP52-NEXT: #pragma omp target map(iterator(int it = 0:20:4),from: a[it]) +foo(); +// OMP52-NEXT: foo(); + + return tmain(argc, &argc) + tmain(argv[0][0], argv[0]); +} +#endif // OMP52 + #ifdef OMPX // RUN: %clang_cc1 -DOMPX -verify -fopenmp -fopenmp-extensions -ast-print %s | FileCheck %s --check-prefix=OMPX diff --git a/clang/test/OpenMP/target_enter_data_ast_print.cpp b/clang/test/OpenMP/target_enter_data_ast_print.cpp index 0ccafaef5b59a..b11d5de13de67 100644 --- a/clang/test/OpenMP/target_enter_data_ast_print.cpp +++ b/clang/test/OpenMP/target_enter_data_ast_print.cpp @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s diff --git a/clang/test/OpenMP/target_enter_data_ast_print_openmp52.cpp b/clang/test/OpenMP/target_enter_data_ast_print_openmp52.cpp new file mode 100644 index 0000000000000..578f9a2542744 --- /dev/null +++ b/clang/test/OpenMP/target_enter_data_ast_print_openmp52.cpp @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck --check-prefix=CHECK --check-prefix=CHECK-52 %s + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +template +T tmain(T argc, T *argv) { + T i_def, i; + + i = argc; + +#pragma omp target enter data map(i_def) + +#pragma omp target enter data map(to: i) + + return 0; +} + +// CHECK: template T tmain(T argc, T *argv) { +// CHECK-NEXT: T i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target enter data map(to: i_def){{$}} +// CHECK-NEXT: #pragma omp target enter data map(to: i){{$}} + +// CHECK: template<> int tmain(int argc, int *argv) { +// CHECK-NEXT: int i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target enter data map(to: i_def){{$}} +// CHECK-NEXT: #pragma omp target enter data map(to: i) + +// CHECK: template<> char tmain(char argc, char *argv) { +// CHECK-NEXT: char i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target enter data map(to: i_def){{$}} +// CHECK-NEXT: #pragma omp target enter data map(to: i) + +int main (int argc, char **argv) { + int b_def, b; + static int a_def, a; +// CHECK: static int a_def, a; + +#pragma omp target enter data map(a_def) +// CHECK: #pragma omp target enter data map(to: a_def) + a_def=2; +// CHECK-NEXT: a_def = 2; + +#pragma omp target enter data map(to: a) +// CHECK: #pragma omp target enter data map(to: a) + a=2; +// CHECK-NEXT: a = 2; + +#pragma omp target enter data map(b_def) +// CHECK-NEXT: #pragma omp target enter data map(to: b_def) + +#pragma omp target enter data map(to: b) +// CHECK-NEXT: #pragma omp target enter data map(to: b) + + return tmain(argc, &argc) + tmain(argv[0][0], argv[0]); +} + +#endif diff --git a/clang/test/OpenMP/target_exit_data_ast_print.cpp b/clang/test/OpenMP/target_exit_data_ast_print.cpp index 4b3f65b5835fb..f482f379361bd 100644 --- a/clang/test/OpenMP/target_exit_data_ast_print.cpp +++ b/clang/test/OpenMP/target_exit_data_ast_print.cpp @@ -6,6 +6,10 @@ // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=52 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s diff --git a/clang/test/OpenMP/target_exit_data_ast_print_openmp52.cpp b/clang/test/OpenMP/target_exit_data_ast_print_openmp52.cpp new file mode 100644 index 0000000000000..fbc431eadbccb --- /dev/null +++ b/clang/test/OpenMP/target_exit_data_ast_print_openmp52.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=52 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=52 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +template +T tmain(T argc, T *argv) { + T i_def, i; + + i = argc; +#pragma omp target exit data map(i_def) + +#pragma omp target exit data map(from: i) + + return 0; +} + +// CHECK: template T tmain(T argc, T *argv) { +// CHECK-NEXT: T i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target exit data map(from: i_def){{$}} +// CHECK-NEXT: #pragma omp target exit data map(from: i){{$}} + +// CHECK: template<> int tmain(int argc, int *argv) { +// CHECK-NEXT: int i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target exit data map(from: i_def) +// CHECK-NEXT: #pragma omp target exit data map(from: i) + +// CHECK: template<> char tmain(char argc, char *argv) { +// CHECK-NEXT: char i_def, i; +// CHECK-NEXT: i = argc; +// CHECK-NEXT: #pragma omp target exit data map(from: i_def) +// CHECK-NEXT: #pragma omp target exit data map(from: i) + +int main (int argc, char **argv) { + int b_def, b; + static int a_def, a; +// CHECK: static int a_def, a; + +#pragma omp target exit data map(a_def) +// CHECK: #pragma omp target exit data map(from: a_def) + a_def=2; +// CHECK-NEXT: a_def = 2; + +#pragma omp target exit data map(from: a) +// CHECK: #pragma omp target exit data map(from: a) + a=2; +// CHECK-NEXT: a = 2; + +#pragma omp target exit data map(b_def) +// CHECK-NEXT: #pragma omp target exit data map(from: b_def) + +#pragma omp target exit data map(from: b) +// CHECK-NEXT: #pragma omp target exit data map(from: b) + + return tmain(argc, &argc) + tmain(argv[0][0], argv[0]); +} + +#endif diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp index ae28a149333fb..703bc9dff80cf 100644 --- a/clang/test/OpenMP/target_map_messages.cpp +++ b/clang/test/OpenMP/target_map_messages.cpp @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -verify=expected,lt50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized // RUN: %clang_cc1 -verify=expected,ge50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized // RUN: %clang_cc1 -verify=expected,ge50,ge51,omp,ge51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=51 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized +// RUN: %clang_cc1 -verify=expected,ge50,ge51,ge52,omp,ge52-omp -fopenmp -fno-openmp-extensions -fopenmp-version=52 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized // RUN: %clang_cc1 -DCCODE -verify -fopenmp -fno-openmp-extensions -ferror-limit 300 -x c %s -Wno-openmp -Wuninitialized // -fopenmp-simd, -fno-openmp-extensions @@ -158,23 +159,28 @@ struct SA { // expected-error@+1 {{use of undeclared identifier 'present'}} #pragma omp target map(present) {} + // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c,f) {} + // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c[1:2],f) {} + // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c,f[1:2]) {} + // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c[:],f) {} + // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} @@ -191,11 +197,15 @@ struct SA { // lt51-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(present, present, tofrom: a) {} + // ge52-omp-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} + // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ompx-error@+3 {{same map type modifier has been specified more than once}} // ge51-omp-error@+2 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, ompx_hold, tofrom: a) {} + // ge52-omp-error@+9 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} + // ge52-omp-error@+8 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+7 2 {{same map type modifier has been specified more than once}} // ge51-error@+6 {{same map type modifier has been specified more than once}} // lt51-ompx-error@+5 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} @@ -239,6 +249,36 @@ struct SA { {} #pragma omp target map(([b[I]][bf])f) // lt50-error {{expected ',' or ']' in lambda capture list}} lt50-error {{expected ')'}} lt50-note {{to match this '('}} {} + // ge51-ompx-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator(it=0:10), tofrom:a) + {} + // ompx-error@+8 {{redefinition of 'it'}} + // ompx-note@+7 {{previous definition is here}} + // omp-error@+6 {{redefinition of 'it'}} + // omp-note@+5 {{previous definition is here}} + // ge51-ompx-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator(it=0:10, it=0:20), tofrom:a) + {} + // ge51-ompx-error@+6 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // lt51-error@+4 {{expected '(' after 'iterator'}} + // ge51-error@+3 {{expected '(' after 'iterator'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator, tofrom:a) + {} + // ge51-ompx-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator(), tofrom:a) + {} return; } }; @@ -920,6 +960,24 @@ int main(int argc, char **argv) { pos(i).y = i+1; } + // ge51-ompx-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator(it=0:10), tofrom:a[it]) + {} + + // ompx-error@+8 {{use of undeclared identifier 'itt'; did you mean 'it'?}} + // ompx-note@+7 {{'it' declared here}} + // omp-error@+6 {{use of undeclared identifier 'itt'; did you mean 'it'?}} + // omp-note@+5 {{'it' declared here}} + // ge51-ompx-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}} + // lt51-ompx-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}} + // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} + // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} + #pragma omp target map(iterator(it=0:10), tofrom:a[itt]) + {} + return tmain(argc)+tmain(argc); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } #endif diff --git a/clang/test/OpenMP/xteam_red_codegen.cpp b/clang/test/OpenMP/xteam_red_codegen.cpp new file mode 100644 index 0000000000000..76951203da303 --- /dev/null +++ b/clang/test/OpenMP/xteam_red_codegen.cpp @@ -0,0 +1,1517 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-ignore-env-vars -fopenmp-assume-no-nested-parallelism -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-target-ignore-env-vars -fopenmp-assume-no-nested-parallelism -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#include + +int main() +{ + int N = 100; + + double a[N], b[N]; + int bint[N]; + unsigned cint[N]; + + int8_t int8_sum = 0; + int16_t int16_sum = 0; + int32_t int32_sum = 0; + uint32_t uint32_sum = 0; + int64_t int64_sum = 0; + uint64_t uint64_sum = 0; + + for (int i=0; i +__attribute__((enable_if(true, ""))) +T kaboom(T a, T b) { + return b; +} + +struct A { + double foo(); +}; + +template +struct B { + A &f; + + void bar() { + kaboom(kaboom(0.0, 1.0), f.foo()); + } +}; diff --git a/clang/tools/clang-hip/clang-build-select-link/ClangBuildSelectLink.cpp b/clang/tools/clang-hip/clang-build-select-link/ClangBuildSelectLink.cpp index 7a2dc9f58f1a6..3eef14ad02004 100644 --- a/clang/tools/clang-hip/clang-build-select-link/ClangBuildSelectLink.cpp +++ b/clang/tools/clang-hip/clang-build-select-link/ClangBuildSelectLink.cpp @@ -188,22 +188,35 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L, static bool convertExternsToLinkOnce(Module *MOUT, LLVMContext &Ctx) { // Convert all external functions to LinkOnceODR so they get inlined - // and removed by the optimizer in the next HIP driver step. - // After next opt step, only kernels will exist + // and removed by the optimizer unless optnone is set for (Module::iterator i = MOUT->begin(), e = MOUT->end(); i != e; ++i) { llvm::Function *F = &*i; if (!i->isDeclaration()) { if (Verbose) errs() << "Function attribute cleanup for\'" << F->getName().str().c_str() << "\' \n"; - if (i->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL) { - F->removeFnAttr(llvm::Attribute::OptimizeNone); - } else { + if (i->getCallingConv() != llvm::CallingConv::AMDGPU_KERNEL) { + if (!strncmp(F->getName().str().c_str(), "__ockl_devmem_request", + strlen("__ockl_devmem_request"))) + continue; + if (!strncmp(F->getName().str().c_str(), "__ockl_dm_alloc", + strlen("__ockl_dm_alloc"))) + continue; + if (!strncmp(F->getName().str().c_str(), "__ockl_dm_dealloc", + strlen("__ockl_dm_dealloc"))) + continue; + if (!strncmp(F->getName().str().c_str(), "hostrpc_invoke", + strlen("hostrpc_invoke"))) + continue; + + // all other functions F->setLinkage(GlobalValue::LinkOnceODRLinkage); F->setVisibility(GlobalValue::ProtectedVisibility); - F->removeFnAttr(llvm::Attribute::OptimizeNone); - F->removeFnAttr(llvm::Attribute::NoInline); - F->addFnAttr(llvm::Attribute::AlwaysInline); + if (!F->hasOptNone()) { + F->removeFnAttr(llvm::Attribute::OptimizeNone); + F->removeFnAttr(llvm::Attribute::NoInline); + F->addFnAttr(llvm::Attribute::AlwaysInline); + } } } } diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 8a2d83600c73c..6c9b4e1b64168 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -96,6 +96,7 @@ class ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u8 user_requested_alignment_log : 3; + u8 device_mem : 1; private: u16 user_requested_size_hi; @@ -562,6 +563,7 @@ struct Allocator { uptr chunk_beg = user_beg - kChunkHeaderSize; AsanChunk *m = reinterpret_cast(chunk_beg); m->alloc_type = alloc_type; + m->device_mem = da_info ? 1 : 0; CHECK(size); m->SetUsedSize(size); m->user_requested_alignment_log = user_requested_alignment_log; @@ -617,9 +619,26 @@ struct Allocator { if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { - ReportInvalidFree(ptr, old_chunk_state, stack); - // It's not safe to push a chunk in quarantine on invalid free. - return false; + if (!m->device_mem) { + ReportInvalidFree(ptr, old_chunk_state, stack); + // It's not safe to push a chunk in quarantine on invalid free. + return false; + } else { + // Temporary patch: atomic_compare_exchange_strong will give wrong + // results sometimes for device memory, so just use a mutex to protect + // us from the possible race conditions + // + // We need a mutex, borrow fallback_mutex + SpinMutexLock l(&fallback_mutex); + old_chunk_state = atomic_load(&m->chunk_state, memory_order_relaxed); + if (old_chunk_state == CHUNK_ALLOCATED) { + atomic_store(&m->chunk_state, CHUNK_QUARANTINE, memory_order_relaxed); + } else { + ReportInvalidFree(ptr, old_chunk_state, stack); + // It's not safe to push a chunk in quarantine on invalid free. + return false; + } + } } CHECK_EQ(CHUNK_ALLOCATED, old_chunk_state); // It was a user data. diff --git a/lld/Common/Args.cpp b/lld/Common/Args.cpp index 388c15b3db3ec..c0545194610a6 100644 --- a/lld/Common/Args.cpp +++ b/lld/Common/Args.cpp @@ -11,8 +11,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/CommandFlags.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; using namespace lld; @@ -20,6 +22,21 @@ using namespace lld; // TODO(sbc): Remove this once CGOptLevel can be set completely based on bitcode // function metadata. CodeGenOpt::Level lld::args::getCGOptLevel(int optLevelLTO) { + // TODO(slinder1): Workaround for HeterogeneousDWARF to support `-fgpu-rdc + // -O0 -g`. Remove this when we support higher optimization levels. + if (llvm::AMDGPU::parseArchAMDGCN(llvm::codegen::getCPUStr())) { + switch (optLevelLTO) { + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; + case 3: + return CodeGenOpt::Aggressive; + } + llvm_unreachable("Invalid optimization level"); + } if (optLevelLTO == 3) return CodeGenOpt::Aggressive; assert(optLevelLTO < 3); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 271776ddd32b8..3f775610cd4d0 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -176,12 +176,15 @@ static std::tuple parseEmulation(StringRef emul) { .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) .Case("msp430elf", {ELF32LEKind, EM_MSP430}) + .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) error("unknown emulation: " + emul); if (ret.second == EM_MSP430) osabi = ELFOSABI_STANDALONE; + else if (ret.second == EM_AMDGPU) + osabi = ELFOSABI_AMDGPU_HSA; return std::make_tuple(ret.first, ret.second, osabi); } diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index ab04748b76afa..0ca5bf8461af0 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -177,8 +177,10 @@ static lto::Config createConfig() { }; } - if (config->ltoEmitAsm) + if (config->ltoEmitAsm) { c.CGFileType = CGFT_AssemblyFile; + c.Options.MCOptions.AsmVerbose = true; + } if (!config->saveTempsArgs.empty()) checkError(c.addSaveTemps(config->outputFile.str() + ".", diff --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s new file mode 100644 index 0000000000000..329fb1c69b166 --- /dev/null +++ b/lld/test/ELF/emulation-amdgpu.s @@ -0,0 +1,36 @@ +# REQUIRES: amdgpu + +# RUN: llvm-mc -filetype=obj -triple=amdgcn-amd-amdhsa %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck %s +# RUN: ld.lld -m elf64_amdgpu %t.o -o %t +# RUN: llvm-readobj --file-headers %t | FileCheck %s + +# CHECK: ElfHeader { +# CHECK-NEXT: Ident { +# CHECK-NEXT: Magic: (7F 45 4C 46) +# CHECK-NEXT: Class: 64-bit (0x2) +# CHECK-NEXT: DataEncoding: LittleEndian (0x1) +# CHECK-NEXT: FileVersion: 1 +# CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40) +# CHECK-NEXT: ABIVersion: 3 +# CHECK-NEXT: Unused: (00 00 00 00 00 00 00) +# CHECK-NEXT: } +# CHECK-NEXT: Type: Executable (0x2) +# CHECK-NEXT: Machine: EM_AMDGPU (0xE0) +# CHECK-NEXT: Version: 1 +# CHECK-NEXT: Entry: +# CHECK-NEXT: ProgramHeaderOffset: 0x40 +# CHECK-NEXT: SectionHeaderOffset: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: HeaderSize: 64 +# CHECK-NEXT: ProgramHeaderEntrySize: 56 +# CHECK-NEXT: ProgramHeaderCount: +# CHECK-NEXT: SectionHeaderEntrySize: 64 +# CHECK-NEXT: SectionHeaderCount: +# CHECK-NEXT: StringTableSectionIndex: +# CHECK-NEXT: } + +.globl _start +_start: diff --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll index a2f25cdd57d87..a70b678ac2514 100644 --- a/lld/test/ELF/lto/amdgcn-oses.ll +++ b/lld/test/ELF/lto/amdgcn-oses.ll @@ -15,7 +15,7 @@ ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40) -; AMDHSA: ABIVersion: 2 +; AMDHSA: ABIVersion: 3 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41) ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/lld/test/ELF/lto/amdgpu-cg-opt-level.ll b/lld/test/ELF/lto/amdgpu-cg-opt-level.ll new file mode 100644 index 0000000000000..2e21f032c19b2 --- /dev/null +++ b/lld/test/ELF/lto/amdgpu-cg-opt-level.ll @@ -0,0 +1,23 @@ +; REQUIRES: amdgpu + +; TODO(slinder1): Workaround for HeterogeneousDWARF to support `-fgpu-rdc +; -O0 -g`. Remove this when we support higher optimization levels. + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld -plugin-opt=O0 -plugin-opt=mcpu=gfx90a %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: ld.lld -plugin-opt=O1 -plugin-opt=mcpu=gfx90a %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=CHECK-O1 %s +; RUN: ld.lld -plugin-opt=O2 -plugin-opt=mcpu=gfx90a %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=CHECK-O2 %s +; RUN: ld.lld -plugin-opt=O3 -plugin-opt=mcpu=gfx90a %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=CHECK-O3 %s + +; CHECK-O0: Fast Register Allocator +; CHECK-O1: Greedy Register Allocator +; CHECK-O2: Greedy Register Allocator +; CHECK-O3: Greedy Register Allocator + +target triple = "amdgcn-amd-amdhsa" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/ELF/lto/emit-asm.ll b/lld/test/ELF/lto/emit-asm.ll index d0719411a5bad..3f635b8dbe7f7 100644 --- a/lld/test/ELF/lto/emit-asm.ll +++ b/lld/test/ELF/lto/emit-asm.ll @@ -11,14 +11,18 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +;; Note: we also check for the presence of comments; --lto-emit-asm output should be verbose. + +; CHECK-DAG: # -- Begin function f1 ; CHECK-DAG: f1: -; OPT-DAG: define void @f1() +; OPT: define void @f1() define void @f1() { ret void } +; CHECK-DAG: # -- Begin function f2 ; CHECK-DAG: f2: -; OPT-DAG: define void @f2() +; OPT: define void @f2() define void @f2() { ret void } diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 9034ee4747185..4a4104e786fe4 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1,8 +1,8 @@ # Modifications Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. # Notified per clause 4(b) of the license. -# See docs/CMake.html for instructions about how to build LLVM with CMake. +# See docs/CMake.html for instructions about how to build LLVM with CMake. cmake_minimum_required(VERSION 3.13.4) set(LLVM_COMMON_CMAKE_UTILS ${CMAKE_CURRENT_SOURCE_DIR}/../cmake) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 403a1f80dc54a..5fedc0afeb1b5 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3672,6 +3672,11 @@ Code object V5 metadata is the same as buffer that conforms to the requirements of the malloc/free device library V1 version implementation. + "hidden_heap_v1" + A global address space pointer to an initialized memory + buffer that conforms to the requirements of the malloc/free + device library V1 version implementation. + "hidden_private_base" The high 32 bits of the flat addressing private aperture base. Only used by GFX8 to allow conversion between private segment diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index 119285087957f..9278ccdb47887 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -227,6 +227,32 @@ struct PointerLikeTypeTraits< PtrTraits::NumLowBitsAvailable - IntBits; }; +// Allow structured bindings on PointerIntPair. +template +decltype(auto) +get(const PointerIntPair &Pair) { + static_assert(I < 2); + if constexpr (I == 0) + return Pair.getPointer(); + else + return Pair.getInt(); +} + } // end namespace llvm +namespace std { +template +struct tuple_size< + llvm::PointerIntPair> + : std::integral_constant {}; + +template +struct tuple_element< + I, llvm::PointerIntPair> + : std::conditional {}; +} // namespace std + #endif // LLVM_ADT_POINTERINTPAIR_H diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index bbc5bc8ceeea8..7b1cc24e47bae 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -705,6 +705,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase { void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); } + void assign(size_type NumElts, ValueParamT Elt) { // Note that Elt could be an internal reference. if (NumElts > this->capacity()) { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 3a53017a4e1c9..246b5612c544f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1275,7 +1275,8 @@ class LegalizationArtifactCombiner { // Adding Use to ArtifactList. WrapperObserver.changedInstr(Use); break; - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: { Register Copy = Use.getOperand(0).getReg(); if (Copy.isVirtual()) UpdatedDefs.push_back(Copy); @@ -1296,6 +1297,7 @@ class LegalizationArtifactCombiner { static Register getArtifactSrcReg(const MachineInstr &MI) { switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::G_TRUNC: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: @@ -1333,8 +1335,7 @@ class LegalizationArtifactCombiner { MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); if (MRI.hasOneUse(PrevRegSrc)) { if (TmpDef != &DefMI) { - assert((TmpDef->getOpcode() == TargetOpcode::COPY || - isArtifactCast(TmpDef->getOpcode())) && + assert((TmpDef->isCopy() || isArtifactCast(TmpDef->getOpcode())) && "Expecting copy or artifact cast here"); DeadInsts.push_back(TmpDef); @@ -1421,7 +1422,8 @@ class LegalizationArtifactCombiner { using namespace llvm::MIPatternMatch; Register TmpReg; - while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { + while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg))) || + mi_match(Reg, MRI, m_Pred_Copy(m_Reg(TmpReg)))) { if (MRI.getType(TmpReg).isValid()) Reg = TmpReg; else diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 3879e22552ecb..decfda3da7a5f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -629,6 +629,12 @@ inline UnaryOp_match m_Copy(SrcTy &&Src) { return UnaryOp_match(std::forward(Src)); } +template +inline UnaryOp_match m_Pred_Copy(SrcTy &&Src) { + return UnaryOp_match( + std::forward(Src)); +} + template inline UnaryOp_match m_GFSqrt(const SrcTy &Src) { return UnaryOp_match(Src); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index d0918485249dc..6b1a973147558 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -85,6 +85,7 @@ class Pass; class raw_ostream; class TargetPassConfig; class TargetRegisterInfo; +class TargetInstrInfo; /// This pass implements the reg bank selector pass used in the GlobalISel /// pipeline. At the end of this pass, all register operands have been assigned @@ -493,6 +494,9 @@ class RegBankSelect : public MachineFunctionPass { /// Information on the register classes for the current function. const TargetRegisterInfo *TRI = nullptr; + /// Information used to access the description of the opcodes. + const TargetInstrInfo *TII = nullptr; + /// Get the frequency of blocks. /// This is required for non-fast mode. MachineBlockFrequencyInfo *MBFI = nullptr; diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 86ac30e181a6d..f0ed566b1dce9 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -134,7 +134,7 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), FirstNew(newRegs.size()), DeadRemats(deadRemats) { - MRI.setDelegate(this); + MRI.addDelegate(this); } ~LiveRangeEdit() override { MRI.resetDelegate(this); } diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index fe4ad270f2a3b..39f0bf4122233 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -775,10 +775,15 @@ class MachineBasicBlock /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of - /// it. This should return null if it can reach the block after - /// it, but it uses an explicit branch to do so (e.g., a table - /// jump). Non-null return is a conservative answer. - MachineBasicBlock *getFallThrough(); + /// it. If an explicit branch to the fallthrough block is not allowed, + /// set JumpToFallThrough to be false. Non-null return is a conservative + /// answer. + MachineBasicBlock *getFallThrough(bool JumpToFallThrough = false); + + /// Return the fallthrough block if the block can implicitly + /// transfer control to it's successor, whether by a branch or + /// a fallthrough. Non-null return is a conservative answer. + MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(true); } /// Return true if the block can implicitly transfer control to the /// block after it by falling off the end of it. This should return diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index cd3aa938ed870..3cb8d7b35e374 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -99,9 +99,10 @@ struct MachineFunctionInfo { /// supplied allocator. /// /// This function can be overridden in a derive class. - template - static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) { - return new (Allocator.Allocate()) Ty(MF); + template + static FuncInfoTy *create(BumpPtrAllocator &Allocator, const Function &F, + const SubtargetTy *STI) { + return new (Allocator.Allocate()) FuncInfoTy(F, STI); } template @@ -280,6 +281,7 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { // Keep track of the function section. MCSection *Section = nullptr; + // Catchpad unwind destination info for wasm EH. // Keeps track of Wasm exception handling related data. This will be null for // functions that aren't using a wasm EH personality. WasmEHFuncInfo *WasmEHInfo = nullptr; @@ -752,14 +754,12 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// template Ty *getInfo() { - if (!MFInfo) - MFInfo = Ty::template create(Allocator, *this); return static_cast(MFInfo); } template const Ty *getInfo() const { - return const_cast(this)->getInfo(); + return static_cast(MFInfo); } template Ty *cloneInfo(const Ty &Old) { @@ -768,6 +768,9 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { return static_cast(MFInfo); } + /// Initialize the target specific MachineFunctionInfo + void initTargetMachineFunctionInfo(const TargetSubtargetInfo &STI); + MachineFunctionInfo *cloneInfoFrom( const MachineFunction &OrigMF, const DenseMap &Src2DstMBB) { diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index ec0cc763dbac3..942d52f387489 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1354,7 +1354,8 @@ class MachineInstr } bool isCopy() const { - return getOpcode() == TargetOpcode::COPY; + return getOpcode() == TargetOpcode::COPY || + getOpcode() == TargetOpcode::PRED_COPY; } bool isFullCopy() const { @@ -1388,6 +1389,7 @@ class MachineInstr case TargetOpcode::PHI: case TargetOpcode::G_PHI: case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: case TargetOpcode::REG_SEQUENCE: diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index a51f1c753cd02..572217213920d 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -17,6 +17,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/iterator_range.h" @@ -56,11 +57,15 @@ class MachineRegisterInfo { virtual ~Delegate() = default; virtual void MRI_NoteNewVirtualRegister(Register Reg) = 0; + virtual void MRI_NotecloneVirtualRegister(Register NewReg, + Register SrcReg) { + MRI_NoteNewVirtualRegister(NewReg); + } }; private: MachineFunction *MF; - Delegate *TheDelegate = nullptr; + SmallPtrSet TheDelegates; /// True if subregister liveness is tracked. const bool TracksSubRegLiveness; @@ -154,19 +159,28 @@ class MachineRegisterInfo { void resetDelegate(Delegate *delegate) { // Ensure another delegate does not take over unless the current - // delegate first unattaches itself. If we ever need to multicast - // notifications, we will need to change to using a list. - assert(TheDelegate == delegate && - "Only the current delegate can perform reset!"); - TheDelegate = nullptr; + // delegate first unattaches itself. + assert(TheDelegates.count(delegate) && + "Only an existing delegate can perform reset!"); + TheDelegates.erase(delegate); } - void setDelegate(Delegate *delegate) { - assert(delegate && !TheDelegate && - "Attempted to set delegate to null, or to change it without " + void addDelegate(Delegate *delegate) { + assert(delegate && !TheDelegates.count(delegate) && + "Attempted to add null delegate, or to change it without " "first resetting it!"); - TheDelegate = delegate; + TheDelegates.insert(delegate); + } + + void noteNewVirtualRegister(Register Reg) { + for (auto *TheDelegate : TheDelegates) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); + } + + void noteCloneVirtualRegister(Register NewReg, Register SrcReg) { + for (auto *TheDelegate : TheDelegates) + TheDelegate->MRI_NotecloneVirtualRegister(NewReg, SrcReg); } //===--------------------------------------------------------------------===// @@ -900,6 +914,18 @@ class MachineRegisterInfo { /// of reserved registers before allocation begins. void freezeReservedRegs(const MachineFunction&); + /// reserveReg -- Mark a register as reserved so checks like isAllocatable + /// will not suggest using it. This should not be used during the middle + /// of a function walk, or when liveness info is available. + void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI) { + assert(reservedRegsFrozen() && + "Reserved registers haven't been frozen yet. "); + MCRegAliasIterator R(PhysReg, TRI, true); + + for (; R.isValid(); ++R) + ReservedRegs.set(*R); + } + /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called /// to ensure the set of reserved registers stays constant. bool reservedRegsFrozen() const { diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 40663f95fa0a8..665222efaeacd 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -98,10 +98,11 @@ struct ExtAddrMode { class TargetInstrInfo : public MCInstrInfo { public: TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, - unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u) + unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u, + unsigned CopyOpcode = TargetOpcode::COPY) : CallFrameSetupOpcode(CFSetupOpcode), CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode), - ReturnOpcode(ReturnOpcode) {} + ReturnOpcode(ReturnOpcode), CopyOpcode(CopyOpcode) {} TargetInstrInfo(const TargetInstrInfo &) = delete; TargetInstrInfo &operator=(const TargetInstrInfo &) = delete; virtual ~TargetInstrInfo(); @@ -240,6 +241,7 @@ class TargetInstrInfo : public MCInstrInfo { unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } unsigned getReturnOpcode() const { return ReturnOpcode; } + unsigned getCopyOpcode() const { return CopyOpcode; } /// Returns the actual stack pointer adjustment made by an instruction /// as part of a call sequence. By default, only call frame setup/destroy @@ -1060,24 +1062,36 @@ class TargetInstrInfo : public MCInstrInfo { /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given /// machine basic block before the specified machine instruction. If isKill - /// is true, the register operand is the last use and must be marked kill. + /// is true, the register operand is the last use and must be marked kill. If + /// \p SrcReg is being directly spilled as part of assigning a virtual + /// register, \p VReg is the register being assigned. This additional register + /// argument is needed for certain targets when invoked from RegAllocFast to + /// map the spilled physical register to its virtual register. A null register + /// can be passed elsewhere. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::storeRegToStackSlot!"); } /// Load the specified register of the given register class from the specified /// stack frame index. The load instruction is to be added to the given - /// machine basic block before the specified machine instruction. + /// machine basic block before the specified machine instruction. If \p + /// DestReg is being directly reloaded as part of assigning a virtual + /// register, \p VReg is the register being assigned. This additional register + /// argument is needed for certain targets when invoked from RegAllocFast to + /// map the loaded physical register to its virtual register. A null register + /// can be passed elsewhere. virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::loadRegFromStackSlot!"); } @@ -1899,14 +1913,48 @@ class TargetInstrInfo : public MCInstrInfo { return false; } + /// Helper function for inserting a COPY to \p Dst at insertion point \p InsPt + /// in \p MBB block. + MachineInstr *buildCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, const DebugLoc &DL, + Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(getCopyOpcode()), Dst); + } + + /// Helper function for inserting a COPY to \p Dst from \p Src at insertion + /// point \p InsPt in \p MBB block. + MachineInstr *buildCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, const DebugLoc &DL, + Register Dst, Register Src, unsigned Flags = 0, + unsigned SubReg = 0) const { + return BuildMI(MBB, InsPt, DL, get(getCopyOpcode()), Dst) + .addReg(Src, Flags, SubReg); + } + + /// Helper function for inserting a COPY to \p Dst from \p Src at insertion + /// point \p InsPt in \p MBB block. Get the Debug Location from \p MIMD. + MachineInstrBuilder buildCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const MIMetadata &MIMD, Register Dst, + Register Src, unsigned Flags = 0, + unsigned SubReg = 0) const { + MachineFunction &MF = *MBB.getParent(); + MachineInstr *MI = + MF.CreateMachineInstr(get(getCopyOpcode()), MIMD.getDL()); + MBB.insert(InsPt, MI); + return MachineInstrBuilder(MF, MI) + .setPCSections(MIMD.getPCSections()) + .addReg(Dst, RegState::Define) + .addReg(Src, Flags, SubReg); + } + /// During PHI eleimination lets target to make necessary checks and /// insert the copy to the PHI destination register in a target specific /// manner. virtual MachineInstr *createPHIDestinationCopy( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const { - return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) - .addReg(Src); + return buildCopy(MBB, InsPt, DL, Dst, Src); } /// During PHI eleimination lets target to make necessary checks and @@ -1917,8 +1965,7 @@ class TargetInstrInfo : public MCInstrInfo { const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const { - return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) - .addReg(Src, 0, SrcSubReg); + return buildCopy(MBB, InsPt, DL, Dst, Src, 0, SrcSubReg); } /// Returns a \p outliner::OutlinedFunction struct containing target-specific @@ -2019,6 +2066,7 @@ class TargetInstrInfo : public MCInstrInfo { unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; unsigned ReturnOpcode; + unsigned CopyOpcode; }; /// Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a76fb97a14dc5..41f90d5152235 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1049,6 +1049,10 @@ class TargetLoweringBase { // value representing memory location PointerUnion ptrVal; + // Fallback address space for use if ptrVal is nullptr. None means unknown + // address space. + Optional fallbackAddressSpace; + int offset = 0; // offset off of ptrVal uint64_t size = 0; // the size of the memory location // (taken from memVT if zero) @@ -4050,23 +4054,6 @@ class TargetLowering : public TargetLoweringBase { return false; } - /// Allows the target to handle physreg-carried dependency - /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether - /// to add the edge to the dependency graph. - /// Def - input: Selection DAG node defininfg physical register - /// User - input: Selection DAG node using physical register - /// Op - input: Number of User operand - /// PhysReg - inout: set to the physical register if the edge is - /// necessary, unchanged otherwise - /// Cost - inout: physical register copy cost. - /// Returns 'true' is the edge is necessary, 'false' otherwise - virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - unsigned &PhysReg, int &Cost) const { - return false; - } - /// Target-specific combining of register parts into its original value virtual SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index d55f88dd50e57..2e8527b00dcc2 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -905,6 +905,10 @@ class TargetRegisterInfo : public MCRegisterInfo { /// (3) Bottom-up allocation is no longer guaranteed to optimally color. virtual bool reverseLocalAssignment() const { return false; } + /// Add the allocation priority to global and split ranges as well as the + /// local ranges when registers are added to the queue. + virtual bool addAllocPriorityToGlobalRanges() const { return false; } + /// Allow the target to override the cost of using a callee-saved register for /// the first time. Default value of 0 means we will use a callee-saved /// register if it is available. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index d37d5c053ca4f..9a6167d0f5887 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -193,6 +193,7 @@ enum OMPTgtExecModeFlags : int8_t { OMP_TGT_EXEC_MODE_GENERIC_SPMD = OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD, OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2, + OMP_TGT_EXEC_MODE_SPMD_BIG_JUMP_LOOP = OMP_TGT_EXEC_MODE_SPMD_NO_LOOP | 1, OMP_TGT_EXEC_MODE_XTEAM_RED = 1 << 3, LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_XTEAM_RED) }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index a9fdf97782e8e..6da08a06b46c0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -531,6 +531,38 @@ __OMP_RTL(__kmpc_xteamr_f_16x64, false, Void, Float, FloatPtr, FloatPtr, Int32Pt __OMP_RTL(__kmpc_xteamr_d_16x64, false, Void, Double, DoublePtr, DoublePtr, Int32Ptr, VoidPtr, VoidPtr, Double, Int64, Int32) +__OMP_RTL(__kmpc_xteamr_ui_8x64, false, Void, Int32, Int32Ptr, Int32Ptr, Int32Ptr, VoidPtr, VoidPtr, Int32, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ul_8x64, false, Void, Int64, Int64Ptr, Int64Ptr, Int32Ptr, VoidPtr, VoidPtr, Int64, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_f_8x64, false, Void, Float, FloatPtr, FloatPtr, Int32Ptr, VoidPtr, VoidPtr, Float, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_d_8x64, false, Void, Double, DoublePtr, DoublePtr, Int32Ptr, VoidPtr, VoidPtr, Double, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ui_4x64, false, Void, Int32, Int32Ptr, Int32Ptr, Int32Ptr, VoidPtr, VoidPtr, Int32, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ul_4x64, false, Void, Int64, Int64Ptr, Int64Ptr, Int32Ptr, VoidPtr, VoidPtr, Int64, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_f_4x64, false, Void, Float, FloatPtr, FloatPtr, Int32Ptr, VoidPtr, VoidPtr, Float, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_d_4x64, false, Void, Double, DoublePtr, DoublePtr, Int32Ptr, VoidPtr, VoidPtr, Double, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ui_2x64, false, Void, Int32, Int32Ptr, Int32Ptr, Int32Ptr, VoidPtr, VoidPtr, Int32, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ul_2x64, false, Void, Int64, Int64Ptr, Int64Ptr, Int32Ptr, VoidPtr, VoidPtr, Int64, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_f_2x64, false, Void, Float, FloatPtr, FloatPtr, Int32Ptr, VoidPtr, VoidPtr, Float, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_d_2x64, false, Void, Double, DoublePtr, DoublePtr, Int32Ptr, VoidPtr, VoidPtr, Double, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ui_1x64, false, Void, Int32, Int32Ptr, Int32Ptr, Int32Ptr, VoidPtr, VoidPtr, Int32, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_ul_1x64, false, Void, Int64, Int64Ptr, Int64Ptr, Int32Ptr, VoidPtr, VoidPtr, Int64, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_f_1x64, false, Void, Float, FloatPtr, FloatPtr, Int32Ptr, VoidPtr, VoidPtr, Float, Int64, Int32) + +__OMP_RTL(__kmpc_xteamr_d_1x64, false, Void, Double, DoublePtr, DoublePtr, Int32Ptr, VoidPtr, VoidPtr, Double, Int64, Int32) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 24da08d70b726..9b210c9317035 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -516,6 +516,8 @@ class LLVM_EXTERNAL_VISIBILITY Module { void addModuleFlag(MDNode *Node); /// Like addModuleFlag but replaces the old module flag if it already exists. void setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val); + void setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Constant *Val); + void setModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val); /// @} /// @name Materialization diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h index 0f33d3b6a2398..d5d3db0891e3f 100644 --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -230,6 +230,10 @@ class MCSubtargetInfo { return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + ArrayRef getAllProcessorDescriptions() const { + return ProcDesc; + } + virtual unsigned getHwMode() const { return 0; } /// Return the cache size in bytes for the given level of cache. diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 5fb7400b7d024..15f968bab91c3 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -117,6 +117,10 @@ HANDLE_TARGET_OPCODE(DBG_KILL) /// used to copy between subregisters of virtual registers. HANDLE_TARGET_OPCODE(COPY) +/// PRED_COPY - Target-independent register copy with a predication. +/// Some targets require it for special handling certain register copies. + HANDLE_TARGET_OPCODE(PRED_COPY) + /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with /// 'InsideBundle' flag are inside the bundle. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9f29e9faf385b..08f6418f6810b 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -126,8 +126,8 @@ GIDefMatchData<"std::function">; def unsigned_matchinfo: GIDefMatchData<"unsigned">; def copy_prop : GICombineRule< - (defs root:$d), - (match (COPY $d, $s):$mi, + (defs root:$mi), + (match (wip_match_opcode COPY, PRED_COPY):$mi, [{ return Helper.matchCombineCopy(*${mi}); }]), (apply [{ Helper.applyCombineCopy(*${mi}); }])>; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index a425b0d2bbea1..0f9d0f7b05159 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1265,6 +1265,15 @@ def COPY : StandardPseudoInstruction { let isAsCheapAsAMove = true; let hasNoSchedulingInfo = false; } +def PRED_COPY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = "PRED_COPY"; + let hasSideEffects = false; + let isAsCheapAsAMove = true; + let hasNoSchedulingInfo = false; + let isPredicable = true; +} def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index d45be68e8a235..55dadffa88ad2 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" #include "llvm/Support/PGOOptions.h" @@ -66,6 +67,7 @@ class PassManagerBase; } using legacy::PassManagerBase; +struct MachineFunctionInfo; namespace yaml { struct MachineFunctionInfo; } @@ -139,6 +141,13 @@ class TargetMachine { return nullptr; } + /// Create the target's instance of MachineFunctionInfo + virtual MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return nullptr; + } + /// Allocate and return a default initialized instance of the YAML /// representation for the MachineFunctionInfo. virtual yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const { @@ -507,6 +516,9 @@ class LLVMTargetMachine : public TargetMachine { /// The default variant to use in unqualified `asm` instructions. /// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`. virtual int unqualifiedInlineAsmVariant() const { return 0; } + + // MachineRegisterInfo callback function + virtual void registerMachineRegisterInfoCallback(MachineFunction &MF) const {} }; /// Helper method for getting the code model, returning Default if diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 61c26dfabed0b..5d1ecdc147f15 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -211,43 +211,65 @@ combineOptionalValuesInAAValueLatice(const Optional &A, /// Helper to represent an access offset and size, with logic to deal with /// uncertainty and check for overlapping accesses. -struct OffsetAndSize { +struct RangeTy { int64_t Offset = Unassigned; int64_t Size = Unassigned; - OffsetAndSize(int64_t Offset, int64_t Size) : Offset(Offset), Size(Size) {} - OffsetAndSize() = default; - static OffsetAndSize getUnknown() { return OffsetAndSize{Unknown, Unknown}; } + RangeTy(int64_t Offset, int64_t Size) : Offset(Offset), Size(Size) {} + RangeTy() = default; + static RangeTy getUnknown() { return RangeTy{Unknown, Unknown}; } /// Return true if offset or size are unknown. bool offsetOrSizeAreUnknown() const { - return Offset == OffsetAndSize::Unknown || Size == OffsetAndSize::Unknown; + return Offset == RangeTy::Unknown || Size == RangeTy::Unknown; } /// Return true if offset and size are unknown, thus this is the default /// unknown object. bool offsetAndSizeAreUnknown() const { - return Offset == OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown; + return Offset == RangeTy::Unknown && Size == RangeTy::Unknown; } /// Return true if the offset and size are unassigned. bool isUnassigned() const { - assert((Offset == OffsetAndSize::Unassigned) == - (Size == OffsetAndSize::Unassigned) && + assert((Offset == RangeTy::Unassigned) == (Size == RangeTy::Unassigned) && "Inconsistent state!"); - return Offset == OffsetAndSize::Unassigned; + return Offset == RangeTy::Unassigned; } /// Return true if this offset and size pair might describe an address that - /// overlaps with \p OAS. - bool mayOverlap(const OffsetAndSize &OAS) const { + /// overlaps with \p Range. + bool mayOverlap(const RangeTy &Range) const { // Any unknown value and we are giving up -> overlap. - if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown()) + if (offsetOrSizeAreUnknown() || Range.offsetOrSizeAreUnknown()) return true; // Check if one offset point is in the other interval [offset, // offset+size]. - return OAS.Offset + OAS.Size > Offset && OAS.Offset < Offset + Size; + return Range.Offset + Range.Size > Offset && Range.Offset < Offset + Size; + } + + RangeTy &operator&=(const RangeTy &R) { + if (Offset == Unassigned) + Offset = R.Offset; + else if (R.Offset != Unassigned && R.Offset != Offset) + Offset = Unknown; + + if (Size == Unassigned) + Size = R.Size; + else if (Size == Unknown || R.Size == Unknown) + Size = Unknown; + else if (R.Size != Unassigned) + Size = std::max(Size, R.Size); + + return *this; + } + + /// Comparison for sorting ranges by offset. + /// + /// Returns true if the offset \p L is less than that of \p R. + inline static bool OffsetLessThan(const RangeTy &L, const RangeTy &R) { + return L.Offset < R.Offset; } /// Constants used to represent special offsets or sizes. @@ -258,19 +280,22 @@ struct OffsetAndSize { static constexpr int64_t Unknown = -2; }; -inline bool operator==(const OffsetAndSize &A, const OffsetAndSize &B) { - return A.Offset == B.Offset && A.Size == B.Size; +inline raw_ostream &operator<<(raw_ostream &OS, const RangeTy &R) { + OS << "[" << R.Offset << ", " << R.Size << "]"; + return OS; } -inline bool operator!=(const OffsetAndSize &A, const OffsetAndSize &B) { - return !(A == B); +inline bool operator==(const RangeTy &A, const RangeTy &B) { + return A.Offset == B.Offset && A.Size == B.Size; } +inline bool operator!=(const RangeTy &A, const RangeTy &B) { return !(A == B); } + /// Return the initial value of \p Obj with type \p Ty if that is a constant. Constant *getInitialValueForObj(Value &Obj, Type &Ty, const TargetLibraryInfo *TLI, const DataLayout &DL, - OffsetAndSize *OASPtr = nullptr); + RangeTy *RangePtr = nullptr); /// Collect all potential underlying objects of \p Ptr at position \p CtxI in /// \p Objects. Assumed information is used and dependences onto \p QueryingAA @@ -1764,7 +1789,10 @@ struct Attributor { /// Try to simplify \p IRP and in the scope \p S. If successful, true is /// returned and all potential values \p IRP can take are put into \p Values. - /// If false is returned no other information is valid. + /// If the result in \p Values contains select or PHI instructions it means + /// those could not be simplified to a single value. Recursive calls with + /// these instructions will yield their respective potential values. If false + /// is returned no other information is valid. bool getAssumedSimplifiedValues(const IRPosition &IRP, const AbstractAttribute *AA, SmallVectorImpl &Values, @@ -4972,7 +5000,7 @@ struct AAPointerInfo : public AbstractAttribute { AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} enum AccessKind { - // First two bits to distinguish may and must accesses + // First two bits to distinguish may and must accesses. AK_MUST = 1 << 0, AK_MAY = 1 << 1, @@ -4981,6 +5009,11 @@ struct AAPointerInfo : public AbstractAttribute { AK_W = 1 << 3, AK_RW = AK_R | AK_W, + // One special case for assumptions about memory content. These + // are neither reads nor writes. They are however always modeled + // as read to avoid using them for write removal. + AK_ASSUMPTION = (1 << 4) | AK_MUST, + // Helper for easy access. AK_MAY_READ = AK_MAY | AK_R, AK_MAY_WRITE = AK_MAY | AK_W, @@ -4990,41 +5023,221 @@ struct AAPointerInfo : public AbstractAttribute { AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W, }; + /// A container for a list of ranges. + struct RangeList { + // The set of ranges rarely contains more than one element, and is unlikely + // to contain more than say four elements. So we find the middle-ground with + // a sorted vector. This avoids hard-coding a rarely used number like "four" + // into every instance of a SmallSet. + using RangeTy = AA::RangeTy; + using VecTy = SmallVector; + using iterator = VecTy::iterator; + using const_iterator = VecTy::const_iterator; + VecTy Ranges; + + RangeList(const RangeTy &R) { Ranges.push_back(R); } + RangeList(ArrayRef Offsets, int64_t Size) { + Ranges.reserve(Offsets.size()); + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + assert(((i + 1 == e) || Offsets[i] < Offsets[i + 1]) && + "Expected strictly ascending offsets."); + Ranges.emplace_back(Offsets[i], Size); + } + } + RangeList() = default; + + iterator begin() { return Ranges.begin(); } + iterator end() { return Ranges.end(); } + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + + // Helpers required for std::set_difference + using value_type = RangeTy; + void push_back(const RangeTy &R) { + assert((Ranges.empty() || RangeTy::OffsetLessThan(Ranges.back(), R)) && + "Ensure the last element is the greatest."); + Ranges.push_back(R); + } + + /// Copy ranges from \p L that are not in \p R, into \p D. + static void set_difference(const RangeList &L, const RangeList &R, + RangeList &D) { + std::set_difference(L.begin(), L.end(), R.begin(), R.end(), + std::back_inserter(D), RangeTy::OffsetLessThan); + } + + unsigned size() const { return Ranges.size(); } + + bool operator==(const RangeList &OI) const { return Ranges == OI.Ranges; } + + /// Merge the ranges in \p RHS into the current ranges. + /// - Merging a list of unknown ranges makes the current list unknown. + /// - Ranges with the same offset are merged according to RangeTy::operator& + /// \return true if the current RangeList changed. + bool merge(const RangeList &RHS) { + if (isUnknown()) + return false; + if (RHS.isUnknown()) { + setUnknown(); + return true; + } + + if (Ranges.empty()) { + Ranges = RHS.Ranges; + return true; + } + + bool Changed = false; + auto LPos = Ranges.begin(); + for (auto &R : RHS.Ranges) { + auto Result = insert(LPos, R); + if (isUnknown()) + return true; + LPos = Result.first; + Changed |= Result.second; + } + return Changed; + } + + /// Insert \p R at the given iterator \p Pos, and merge if necessary. + /// + /// This assumes that all ranges before \p Pos are OffsetLessThan \p R, and + /// then maintains the sorted order for the suffix list. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(iterator Pos, const RangeTy &R) { + if (isUnknown()) + return std::make_pair(Ranges.begin(), false); + if (R.offsetOrSizeAreUnknown()) { + return std::make_pair(setUnknown(), true); + } + + // Maintain this as a sorted vector of unique entries. + auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::OffsetLessThan); + if (LB == Ranges.end() || LB->Offset != R.Offset) + return std::make_pair(Ranges.insert(LB, R), true); + bool Changed = *LB != R; + *LB &= R; + if (LB->offsetOrSizeAreUnknown()) + return std::make_pair(setUnknown(), true); + return std::make_pair(LB, Changed); + } + + /// Insert the given range \p R, maintaining sorted order. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(const RangeTy &R) { + return insert(Ranges.begin(), R); + } + + /// Add the increment \p Inc to the offset of every range. + void addToAllOffsets(int64_t Inc) { + assert(!isUnassigned() && + "Cannot increment if the offset is not yet computed!"); + if (isUnknown()) + return; + for (auto &R : Ranges) { + R.Offset += Inc; + } + } + + /// Return true iff there is exactly one range and it is known. + bool isUnique() const { + return Ranges.size() == 1 && !Ranges.front().offsetOrSizeAreUnknown(); + } + + /// Return the unique range, assuming it exists. + const RangeTy &getUnique() const { + assert(isUnique() && "No unique range to return!"); + return Ranges.front(); + } + + /// Return true iff the list contains an unknown range. + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Ranges.front().offsetOrSizeAreUnknown()) { + assert(Ranges.size() == 1 && "Unknown is a singleton range."); + return true; + } + return false; + } + + /// Discard all ranges and insert a single unknown range. + iterator setUnknown() { + Ranges.clear(); + Ranges.push_back(RangeTy::getUnknown()); + return Ranges.begin(); + } + + /// Return true if no ranges have been inserted. + bool isUnassigned() const { return Ranges.size() == 0; } + }; + /// An access description. struct Access { - Access(Instruction *I, Optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { + Access(Instruction *I, int64_t Offset, int64_t Size, + Optional Content, AccessKind Kind, Type *Ty) + : LocalI(I), RemoteI(I), Content(Content), Ranges(Offset, Size), + Kind(Kind), Ty(Ty) { verify(); } - Access(Instruction *LocalI, Instruction *RemoteI, Optional Content, - AccessKind Kind, Type *Ty) - : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), - Ty(Ty) { + Access(Instruction *LocalI, Instruction *RemoteI, const RangeList &Ranges, + Optional Content, AccessKind K, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Ranges(Ranges), + Kind(K), Ty(Ty) { + if (Ranges.size() > 1) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + verify(); + } + Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, + int64_t Size, Optional Content, AccessKind Kind, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), + Ranges(Offset, Size), Kind(Kind), Ty(Ty) { verify(); } Access(const Access &Other) = default; - Access(const Access &&Other) - : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && + return LocalI == R.LocalI && RemoteI == R.RemoteI && Ranges == R.Ranges && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } Access &operator&=(const Access &R) { assert(RemoteI == R.RemoteI && "Expected same instruction!"); + assert(LocalI == R.LocalI && "Expected same instruction!"); + + // Note that every Access object corresponds to a unique Value, and only + // accesses to the same Value are merged. Hence we assume that all ranges + // are the same size. If ranges can be different size, then the contents + // must be dropped. + Ranges.merge(R.Ranges); Content = AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); + + // Combine the access kind, which results in a bitwise union. + // If there is more than one range, then this must be a MAY. + // If we combine a may and a must access we clear the must bit. Kind = AccessKind(Kind | R.Kind); + if ((Kind & AK_MAY) || Ranges.size() > 1) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + verify(); return *this; } void verify() { assert(isMustAccess() + isMayAccess() == 1 && "Expect must or may access, not both."); + assert(isAssumption() + isWrite() <= 1 && + "Expect assumption access or write access, never both."); + assert((isMayAccess() || Ranges.size() == 1) && + "Cannot be a must access if there are multiple ranges."); } /// Return the access kind. @@ -5036,8 +5249,25 @@ struct AAPointerInfo : public AbstractAttribute { /// Return true if this is a write access. bool isWrite() const { return Kind & AK_W; } - bool isMustAccess() const { return Kind & AK_MUST; } - bool isMayAccess() const { return Kind & AK_MAY; } + /// Return true if this is a write access. + bool isWriteOrAssumption() const { return isWrite() | isAssumption(); } + + /// Return true if this is an assumption access. + bool isAssumption() const { return Kind == AK_ASSUMPTION; } + + bool isMustAccess() const { + bool MustAccess = Kind & AK_MUST; + assert((!MustAccess || Ranges.size() < 2) && + "Cannot be a must access if there are multiple ranges."); + return MustAccess; + } + + bool isMayAccess() const { + bool MayAccess = Kind & AK_MAY; + assert((MayAccess || Ranges.size() < 2) && + "Cannot be a must access if there are multiple ranges."); + return MayAccess; + } /// Return the instruction that causes the access with respect to the local /// scope of the associated attribute. @@ -5054,18 +5284,43 @@ struct AAPointerInfo : public AbstractAttribute { return Content.has_value() && !*Content; } + /// Set the value written to nullptr, i.e., unknown. + void setWrittenValueUnknown() { Content = nullptr; } + /// Return the type associated with the access, if known. Type *getType() const { return Ty; } - /// Return the value writen, if any. As long as - /// isWrittenValueYetUndetermined return true this function shall not be - /// called. - Value *getWrittenValue() const { return *Content; } + /// Return the value writen, if any. + Value *getWrittenValue() const { + assert(!isWrittenValueYetUndetermined() && + "Value needs to be determined before accessing it."); + return *Content; + } /// Return the written value which can be `llvm::null` if it is not yet /// determined. Optional getContent() const { return Content; } + bool hasUniqueRange() const { return Ranges.isUnique(); } + const AA::RangeTy &getUniqueRange() const { return Ranges.getUnique(); } + + /// Add a range accessed by this Access. + /// + /// If there are multiple ranges, then this is a "may access". + void addRange(int64_t Offset, int64_t Size) { + Ranges.insert({Offset, Size}); + if (!hasUniqueRange()) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + } + + const RangeList &getRanges() const { return Ranges; } + + using const_iterator = RangeList::const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + private: /// The instruction responsible for the access with respect to the local /// scope of the associated attribute. @@ -5078,6 +5333,9 @@ struct AAPointerInfo : public AbstractAttribute { /// cannot be determined. Optional Content; + /// Set of potential ranges accessed from the base pointer. + RangeList Ranges; + /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; @@ -5095,13 +5353,12 @@ struct AAPointerInfo : public AbstractAttribute { /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } - /// Call \p CB on all accesses that might interfere with \p OAS and return + /// Call \p CB on all accesses that might interfere with \p Range and return /// true if all such accesses were known and the callback returned true for /// all of them, false otherwise. An access interferes with an offset-size /// pair if it might read or write that memory region. virtual bool forallInterferingAccesses( - AA::OffsetAndSize OAS, - function_ref CB) const = 0; + AA::RangeTy Range, function_ref CB) const = 0; /// Call \p CB on all accesses that might interfere with \p I and /// return true if all such accesses were known and the callback returned true @@ -5113,7 +5370,7 @@ struct AAPointerInfo : public AbstractAttribute { virtual bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB, bool &HasBeenWrittenTo, - AA::OffsetAndSize *OASPtr = nullptr) const = 0; + AA::RangeTy &Range) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d4f32e52cdb8d..f79b0b268bdf3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -938,7 +938,10 @@ void DwarfExprAST::lowerDIOpReferrer(DwarfExprAST::Node *OpNode) { if (Referrer->isReg() && Referrer->getReg()) { auto DWARFRegister = TRI->getDwarfRegNum(Referrer->getReg(), false); - assert(DWARFRegister != -1 && "No DWARF register for referrer"); + if (DWARFRegister == -1) { + IsImplemented = false; + return; + } emitReg(DWARFRegister); } else if (Referrer->isImm()) { auto I = Referrer->getImm(); diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 87d5d053318fc..8767377934656 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -432,7 +432,6 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *MBB = MI.getParent(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; unsigned OldBrSize = TII->getInstSizeInBytes(MI); MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); @@ -446,20 +445,6 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *BranchBB = MBB; - auto RemoveBranch = [&](MachineBasicBlock *MBB) { - unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; - int RemovedSize = 0; - TII->removeBranch(*MBB, &RemovedSize); - BBSize -= RemovedSize; - }; - - auto InsertUncondBranch = [&](MachineBasicBlock *MBB, - MachineBasicBlock *Dst) { - TII->insertUnconditionalBranch(*MBB, Dst, DebugLoc()); - // Recalculate the block size. - BlockInfo[MBB->getNumber()].Size = computeBlockSize(*MBB); - }; - // If this was an expanded conditional branch, there is already a single // unconditional branch in a block. if (!MBB->empty()) { @@ -500,13 +485,10 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); // Fall through only if PrevBB has no unconditional branch as one of its // terminators. - if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) - report_fatal_error("Could not analyze terminators."); - if (!FBB) { - if (!Cond.empty() && TBB && TBB == DestBB) - RemoveBranch(PrevBB); - if (!TBB || (TBB && !Cond.empty())) - InsertUncondBranch(PrevBB, DestBB); + if (auto *FT = PrevBB->getLogicalFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); } // Now, RestoreBB could be placed directly before DestBB. MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 44cdd8275beda..b9ca26aabdc72 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -203,7 +203,7 @@ unsigned llvm::getInvertedFPClassTest(unsigned Test) { static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI, MachineInstr &Copy) { - assert(Copy.getOpcode() == TargetOpcode::COPY && "Must be a COPY"); + assert(Copy.isCopy() && "Must be a COPY"); return &Copy.getOperand(1); } @@ -234,6 +234,7 @@ static MachineOperand *salvageDebugInfoImpl(const MachineRegisterInfo &MRI, case TargetOpcode::G_TRUNC: return getSalvageOpsForTrunc(MRI, MI, Ops); case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: return getSalvageOpsForCopy(MRI, MI); default: return nullptr; diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index 565c8b405f828..87cb3c8cca2d9 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -134,6 +134,7 @@ static bool lowersToCopies(const MachineInstr &MI) { // are not lowered to a COPY. switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::PHI: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::REG_SEQUENCE: @@ -229,6 +230,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, switch (MI.getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::PHI: return UsedLanes; case TargetOpcode::REG_SEQUENCE: { @@ -331,6 +333,7 @@ LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def, break; } case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::PHI: break; default: diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index c108f0088d43a..dcca111e8fa70 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -618,8 +618,7 @@ void SSAIfConv::replacePHIInstrs() { if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal, but we do need a COPY. - BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) - .addReg(PI.TReg); + TII->buildCopy(*Head, FirstTerm, HeadDL, DstReg, PI.TReg); } else { TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 086b4a4dcc47b..99edc925835ba 100644 --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -211,6 +211,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 252910fd94627..55d939de426e3 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -422,7 +422,7 @@ class StatepointState { LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore); TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI, - RC, &TRI); + RC, &TRI, Register()); } } @@ -431,7 +431,7 @@ class StatepointState { const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); int FI = RegToSlotIdx[Reg]; if (It != MBB->end()) { - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); return; } @@ -439,7 +439,7 @@ class StatepointState { // and then swap them. assert(!MBB->empty() && "Empty block"); --It; - TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI); + TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI, Register()); MachineInstr *Reload = It->getPrevNode(); int Dummy = 0; (void)Dummy; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 79837aa54f234..345d1e3497f9d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -1012,7 +1012,7 @@ bool CallLowering::parametersInCSRMatch( // registers. Note that getDefIgnoringCopies does not ignore copies from // physical registers. MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); - if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + if (!RegDef || !RegDef->isCopy()) { LLVM_DEBUG( dbgs() << "... Parameter was not copied into a VReg, cannot tail call.\n"); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1fea2607c061f..40f49ed406829 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -204,7 +204,7 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { return false; } bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { - if (MI.getOpcode() != TargetOpcode::COPY) + if (!MI.isCopy()) return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -4223,7 +4223,7 @@ bool CombinerHelper::matchICmpToLHSKnownBits( LLT LHSTy = MRI.getType(LHS); unsigned LHSSize = LHSTy.getSizeInBits(); unsigned DstSize = DstTy.getSizeInBits(); - unsigned Op = TargetOpcode::COPY; + unsigned Op = Builder.getTII().getCopyOpcode(); if (DstSize != LHSSize) Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT; if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}})) diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index e2c34a31d9a1b..180e900490c5a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -36,6 +36,7 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { const MachineInstr *MI = MRI.getVRegDef(R); switch (MI->getOpcode()) { case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); case TargetOpcode::G_ASSERT_ALIGN: { // TODO: Min with source @@ -200,6 +201,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, break; } case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::G_PHI: case TargetOpcode::PHI: { Known.One = APInt::getAllOnes(BitWidth); @@ -234,7 +236,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, MRI.getType(SrcReg).isValid()) { // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, - Depth + (Opcode != TargetOpcode::COPY)); + Depth + (!MI.isCopy())); Known = KnownBits::commonBits(Known, Known2); // If we reach a point where we don't know anything // just stop looking through the operands. @@ -631,7 +633,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned FirstAnswer = 1; switch (Opcode) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: { MachineOperand &Src = MI.getOperand(1); if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && MRI.getType(Src.getReg()).isValid()) { diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 3dc95e3e9df59..02b9343d0e72e 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2514,8 +2514,16 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { LLT MemTy = Info.memVT.isSimple() ? getLLTForMVT(Info.memVT.getSimpleVT()) : LLT::scalar(Info.memVT.getStoreSizeInBits()); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, MemTy, Alignment)); + + // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic + // didn't yield anything useful. + MachinePointerInfo MPI; + if (Info.ptrVal) + MPI = MachinePointerInfo(Info.ptrVal, Info.offset); + else if (Info.fallbackAddressSpace) + MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + MIB.addMemOperand( + MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); } return true; diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 14b8a141af437..802d36c2dd4c2 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -226,7 +226,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { ReachedBegin = true; else --MII; - if (MI.getOpcode() != TargetOpcode::COPY) + if (!MI.isCopy()) continue; Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 2b9bc22bbb2f3..98afe27039f56 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7337,7 +7337,8 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { return UnableToLegalize; // FIXME: handle extension. // This can be just a plain copy. Observer.changingInstr(MI); - MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY)); + const TargetInstrInfo &TII = MIRBuilder.getTII(); + MI.setDesc(TII.get(TII.getCopyOpcode())); Observer.changedInstr(MI); return Legalized; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 78a8f85e63f20..5da061fe16331 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -287,7 +287,7 @@ MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr, MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res, const SrcOp &Op) { - return buildInstr(TargetOpcode::COPY, Res, Op); + return buildInstr(getTII().getCopyOpcode(), Res, Op); } MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, @@ -511,7 +511,7 @@ MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, assert(Res.getLLTTy(*getMRI()).isScalar() == Op.getLLTTy(*getMRI()).isScalar()); - unsigned Opcode = TargetOpcode::COPY; + unsigned Opcode = getTII().getCopyOpcode(); if (Res.getLLTTy(*getMRI()).getSizeInBits() > Op.getLLTTy(*getMRI()).getSizeInBits()) Opcode = ExtOpc; @@ -1114,6 +1114,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, break; } case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: assert(DstOps.size() == 1 && "Invalid Dst"); // If the caller wants to add a subreg source it has to be done separately // so we may not have any SrcOps at this point yet. diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 07eece77143fe..1b4411bb574d0 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/RegisterBankInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -83,6 +84,7 @@ void RegBankSelect::init(MachineFunction &MF) { assert(RBI && "Cannot work without RegisterBankInfo"); MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); TPC = &getAnalysis(); if (OptMode != Mode::Fast) { MBFI = &getAnalysis(); @@ -160,9 +162,9 @@ bool RegBankSelect::repairReg( // Build the instruction used to repair, then clone it at the right // places. Avoiding buildCopy bypasses the check that Src and Dst have the // same types because the type is a placeholder when this function is called. - MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) - .addDef(Dst) - .addUse(Src); + MI = MIRBuilder.buildInstrNoInsert(TII->getCopyOpcode()) + .addDef(Dst) + .addUse(Src); LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) << '\n'); } else { diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 92368ab9beb78..662b6dd9bca30 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -73,14 +73,11 @@ Register llvm::constrainOperandRegClass( // FIXME: The copy needs to have the classes constrained for its operands. // Use operand's regbank to get the class for old register (Reg). if (RegMO.isUse()) { - BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), ConstrainedReg) - .addReg(Reg); + TII.buildCopy(MBB, InsertIt, InsertPt.getDebugLoc(), ConstrainedReg, Reg); } else { assert(RegMO.isDef() && "Must be a definition"); - BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), Reg) - .addReg(ConstrainedReg); + TII.buildCopy(MBB, std::next(InsertIt), InsertPt.getDebugLoc(), Reg, + ConstrainedReg); } if (GISelChangeObserver *Observer = MF.getObserver()) { Observer->changingInstr(*RegMO.getParent()); @@ -332,6 +329,7 @@ Optional getConstantVRegValWithLookThrough( VReg = MI->getOperand(1).getReg(); break; case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: VReg = MI->getOperand(1).getReg(); if (Register::isPhysicalRegister(VReg)) return None; @@ -446,7 +444,7 @@ llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { if (!DstTy.isValid()) return None; unsigned Opc = DefMI->getOpcode(); - while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) { + while (DefMI->isCopy() || isPreISelGenericOptimizationHint(Opc)) { Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid()) @@ -751,8 +749,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, MRI.setType(LiveIn, RegTy); } - BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) - .addReg(PhysReg); + TII.buildCopy(EntryMBB, EntryMBB.begin(), DL, LiveIn, PhysReg); if (!EntryMBB.isLiveIn(PhysReg)) EntryMBB.addLiveIn(PhysReg); return LiveIn; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index d243cb5c8c58f..22716a1118489 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -417,7 +417,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstrSpan MIS(MII, MBB); // Insert spill without kill flag immediately after def. TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, - MRI.getRegClass(SrcReg), &TRI); + MRI.getRegClass(SrcReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); @@ -993,7 +993,7 @@ void InlineSpiller::insertReload(Register NewVReg, MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + MRI.getRegClass(NewVReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -1030,7 +1030,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill, if (IsRealSpill) TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + MRI.getRegClass(NewVReg), &TRI, Register()); else // Don't spill undef value. // Anything works for undef, in particular keeping the memory @@ -1596,7 +1596,7 @@ void HoistSpillHelper::hoistAllSpills() { MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB); MachineInstrSpan MIS(MII, BB); TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot, - MRI.getRegClass(LiveReg), &TRI); + MRI.getRegClass(LiveReg), &TRI, Register()); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII); for (const MachineInstr &MI : make_range(MIS.begin(), MII)) getVDefInterval(MI, LIS); diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 539d5e7524a6a..47173d353241c 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1743,9 +1743,8 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, return; LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); Register Reg = LI.reg(); - const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { - Register NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->cloneVirtualRegister(Reg); LiveInterval &NewLI = createEmptyInterval(NewVReg); SplitLIs.push_back(&NewLI); } diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index afc04f0045c26..5c8af456fc206 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -33,7 +33,7 @@ void LiveRangeEdit::Delegate::anchor() { } LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg, bool createSubRanges) { - Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.cloneVirtualRegister(OldReg); if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -53,7 +53,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(Register OldReg, } Register LiveRangeEdit::createFrom(Register OldReg) { - Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.cloneVirtualRegister(OldReg); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index f7684ae67d38c..7242a8fa5c278 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -639,8 +639,7 @@ MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) // No luck, create a virtual register. Register VirtReg = MRI.createVirtualRegister(RC); - BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) - .addReg(PhysReg, RegState::Kill); + TII.buildCopy(*this, I, DebugLoc(), VirtReg, PhysReg, RegState::Kill); if (!LiveIn) addLiveIn(PhysReg); return VirtReg; @@ -934,7 +933,7 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const { return Successors.size() == 1 ? Successors[0] : nullptr; } -MachineBasicBlock *MachineBasicBlock::getFallThrough() { +MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. @@ -965,8 +964,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough() { // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. - if (MachineFunction::iterator(TBB) == Fallthrough || - MachineFunction::iterator(FBB) == Fallthrough) + if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough)) return &*Fallthrough; // If it's an unconditional branch to some block not the fall through, it diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 78b49c9236629..c6653ae3a6659 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -187,6 +187,7 @@ void MachineFunction::init() { RegInfo = nullptr; MFInfo = nullptr; + // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && @@ -232,6 +233,12 @@ void MachineFunction::init() { PSVManager = std::make_unique(getTarget()); } +void MachineFunction::initTargetMachineFunctionInfo( + const TargetSubtargetInfo &STI) { + assert(!MFInfo && "MachineFunctionInfo already set"); + MFInfo = Target.createMachineFunctionInfo(Allocator, F, &STI); +} + MachineFunction::~MachineFunction() { clear(); } diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 23d55a5df9f57..a610690648bc0 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -118,6 +118,11 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) { // No pre-existing machine function, create a new one. const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F); MF = new MachineFunction(F, TM, STI, NextFnNum++, *this); + MF->initTargetMachineFunctionInfo(STI); + + // MRI callback for target specific initializations. + TM.registerMachineRegisterInfoCallback(*MF); + // Update the set entry. I.first->second.reset(MF); } else { diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 721bd52448ace..bed78b122177a 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -418,9 +418,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); MachineBasicBlock::iterator At = PredB.getFirstTerminator(); const DebugLoc &DL = PredB.findDebugLoc(At); - auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg) - .addReg(RegOp.getReg(), getRegState(RegOp), - RegOp.getSubReg()); + auto Copy = TII->buildCopy(PredB, At, DL, NewReg, RegOp.getReg(), + getRegState(RegOp), RegOp.getSubReg()); Slots.insertMachineInstrInMaps(*Copy); RegOp.setReg(NewReg); RegOp.setSubReg(0); diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index e48f1beaae2be..b24c850957a3c 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -48,6 +48,7 @@ MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) RegAllocHints.reserve(256); UsedPhysRegMask.resize(NumRegs); PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); + TheDelegates.clear(); } /// setRegClass - Set the register class of the specified virtual register. @@ -79,10 +80,10 @@ constrainRegClass(MachineRegisterInfo &MRI, Register Reg, return NewRC; } -const TargetRegisterClass *MachineRegisterInfo::constrainRegClass( - Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs) { - if (Reg.isPhysical()) - return nullptr; +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(Register Reg, + const TargetRegisterClass *RC, + unsigned MinNumRegs) { return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); } @@ -162,8 +163,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, // New virtual register number. Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = RegClass; - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteNewVirtualRegister(Reg); return Reg; } @@ -172,8 +172,7 @@ Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, Register Reg = createIncompleteVirtualRegister(Name); VRegInfo[Reg].first = VRegInfo[VReg].first; setType(Reg, getType(VReg)); - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteCloneVirtualRegister(Reg, VReg); return Reg; } @@ -189,8 +188,7 @@ MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) { // FIXME: Should we use a dummy register class? VRegInfo[Reg].first = static_cast(nullptr); setType(Reg, Ty); - if (TheDelegate) - TheDelegate->MRI_NoteNewVirtualRegister(Reg); + noteNewVirtualRegister(Reg); return Reg; } @@ -479,9 +477,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, --i; --e; } else { // Emit a copy. - BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), - TII.get(TargetOpcode::COPY), LiveIns[i].second) - .addReg(LiveIns[i].first); + TII.buildCopy(*EntryMBB, EntryMBB->begin(), DebugLoc(), + LiveIns[i].second, LiveIns[i].first); // Add the register to the entry block live-in set. EntryMBB->addLiveIn(LiveIns[i].first); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 6ef36d86891a1..e315d77382f39 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1818,7 +1818,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Verify properties of various specific instruction types switch (MI->getOpcode()) { - case TargetOpcode::COPY: { + case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &SrcOp = MI->getOperand(1); const Register SrcReg = SrcOp.getReg(); diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index c7fde45eba6a6..d75673a4f9afa 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -824,9 +824,7 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, // We split the lifetime when we find the first use. if (SplitReg == 0) { SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); - BuildMI(*KernelBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), SplitReg) - .addReg(Def); + TII->buildCopy(*KernelBB, MI, MI->getDebugLoc(), SplitReg, Def); } BBJ.substituteRegister(Def, SplitReg, 0, *TRI); } @@ -1191,9 +1189,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr( UseOp.setReg(ReplaceReg); else { Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), - SplitReg) - .addReg(ReplaceReg); + TII->buildCopy(*BB, UseMI, UseMI->getDebugLoc(), SplitReg, ReplaceReg); UseOp.setReg(SplitReg); } } diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 31e37c4cd7e3e..ceabe03ceee57 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -603,9 +603,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, RC = MRI->getRegClass(UseMI->getOperand(0).getReg()); Register NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) - .addReg(DstReg, 0, SubIdx); + TII->buildCopy(*UseMBB, UseMI, UseMI->getDebugLoc(), NewVR, DstReg, 0, + SubIdx); if (UseSrcSubIdx) UseMO->setSubReg(0); @@ -1024,7 +1023,7 @@ class ExtractSubregRewriter : public Rewriter { // Get rid of the sub-register index. CopyLike.removeOperand(2); // Morph the operation into a COPY. - CopyLike.setDesc(TII.get(TargetOpcode::COPY)); + CopyLike.setDesc(TII.get(TII.getCopyOpcode())); return true; } CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); @@ -1112,6 +1111,7 @@ static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { default: return nullptr; case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: return new CopyRewriter(MI); case TargetOpcode::INSERT_SUBREG: return new InsertSubregRewriter(MI); @@ -1253,9 +1253,8 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, Register NewVReg = MRI->createVirtualRegister(DefRC); MachineInstr *NewCopy = - BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVReg) - .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + TII->buildCopy(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), + NewVReg, NewSrc.Reg, 0, NewSrc.SubReg); if (Def.SubReg) { NewCopy->getOperand(0).setSubReg(Def.SubReg); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index ec27272127d6f..26cb1a90a6a51 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -594,13 +594,12 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, unsigned Reg = CS.getReg(); if (CS.isSpilledToReg()) { - BuildMI(SaveBlock, I, DebugLoc(), - TII.get(TargetOpcode::COPY), CS.getDstReg()) - .addReg(Reg, getKillRegState(true)); + TII.buildCopy(SaveBlock, I, DebugLoc(), CS.getDstReg(), Reg, + getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, - TRI); + TRI, Register()); } } } @@ -622,11 +621,12 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); if (CI.isSpilledToReg()) { - BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg) - .addReg(CI.getDstReg(), getKillRegState(true)); + TII.buildCopy(RestoreBlock, I, DebugLoc(), Reg, CI.getDstReg(), + getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, + TRI, Register()); assert(I != RestoreBlock.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 3da1cfbb05870..0e7ad506f884c 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -527,7 +527,8 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI, + VirtReg); ++NumStores; MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator(); @@ -592,7 +593,7 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg, << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI, VirtReg); ++NumLoads; } @@ -1024,9 +1025,8 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, std::next((MachineBasicBlock::iterator)MI.getIterator()); LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to " << printReg(PrevReg, TRI) << '\n'); - BuildMI(*MBB, InsertBefore, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), PrevReg) - .addReg(LRI->PhysReg, llvm::RegState::Kill); + TII->buildCopy(*MBB, InsertBefore, MI.getDebugLoc(), PrevReg, + LRI->PhysReg, llvm::RegState::Kill); } MachineOperand &MO = MI.getOperand(OpNum); if (MO.getSubReg() && !MO.isUndef()) { diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3310cdd697c47..9a3854c2c063e 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -317,6 +317,7 @@ unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const { } else { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. + bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = RC.GlobalPriority || (!ReverseLocalAssignment && diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 8865bcf9cd6db..46c94cda5fbe0 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1186,9 +1186,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); // Insert new copy to CopyLeftBB. - MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), - TII->get(TargetOpcode::COPY), IntB.reg()) - .addReg(IntA.reg()); + MachineInstr *NewCopyMI = TII->buildCopy( + *CopyLeftBB, InsPos, CopyMI.getDebugLoc(), IntB.reg(), IntA.reg()); SlotIndex NewCopyIdx = LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index 289d31be2d2d6..e4a9dc827701c 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -499,14 +499,14 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, ": Cannot scavenge register without an emergency " "spill slot!"); } - TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI, Register()); MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI, Register()); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp index 0f73973c8a51c..11bdf3bb2ba8c 100644 --- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "reset-machine-function" @@ -66,6 +67,12 @@ namespace { LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n'); ++NumFunctionsReset; MF.reset(); + MF.initTargetMachineFunctionInfo(MF.getSubtarget()); + + const LLVMTargetMachine &TM = MF.getTarget(); + // MRI callback for target specific initializations. + TM.registerMachineRegisterInfoCallback(MF); + if (EmitFallbackDiag) { const Function &F = MF.getFunction(); DiagnosticInfoISelFallback DiagFallback(F); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 053f825478568..60522597f955c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1504,8 +1504,7 @@ bool FastISel::selectFreeze(const User *I) { MVT Ty = ETy.getSimpleVT(); const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); Register ResultReg = createResultReg(TyRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, Reg); updateValueMap(I, ResultReg); return true; @@ -1962,8 +1961,7 @@ Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. Register NewOp = createResultReg(RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, NewOp, Op); return NewOp; } } @@ -1992,8 +1990,8 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; @@ -2016,8 +2014,8 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2042,8 +2040,8 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, .addReg(Op0) .addReg(Op1) .addReg(Op2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2064,8 +2062,8 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2088,8 +2086,8 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, .addReg(Op0) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2107,8 +2105,8 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2132,8 +2130,8 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, .addReg(Op0) .addReg(Op1) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2148,8 +2146,8 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, + II.ImplicitDefs[0]); } return ResultReg; } @@ -2161,8 +2159,7 @@ Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0, 0, Idx); + TII.buildCopy(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, ResultReg, Op0, 0, Idx); return ResultReg; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7b5414aeb1350..3b51f5872773c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -174,8 +174,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - VRBase).addReg(SrcReg); + TII->buildCopy(*MBB, InsertPos, Node->getDebugLoc(), VRBase, SrcReg); } SDValue Op(Node, ResNo); @@ -332,8 +331,8 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); Register NewVReg = MRI->createVirtualRegister(OpRC); - BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + TII->buildCopy(*MBB, InsertPos, Op.getNode()->getDebugLoc(), NewVReg, + VReg); VReg = NewVReg; } else { assert(ConstrainedRC->isAllocatable() && @@ -399,8 +398,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { Register NewVReg = MRI->createVirtualRegister(IIRC); - BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + TII->buildCopy(*MBB, InsertPos, Op.getNode()->getDebugLoc(), NewVReg, + VReg); VReg = NewVReg; } // Turn additional physreg operands into implicit uses on non-variadic @@ -468,8 +467,7 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); Register NewReg = MRI->createVirtualRegister(RC); - BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) - .addReg(VReg); + TII->buildCopy(*MBB, InsertPos, DL, NewReg, VReg); return NewReg; } @@ -525,8 +523,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // to a copy // r1026 = copy r1024 VRBase = MRI->createVirtualRegister(TRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + TII->buildCopy(*MBB, InsertPos, Node->getDebugLoc(), VRBase, SrcReg); MRI->clearKillFlags(SrcReg); } else { // Reg may not support a SubIdx sub-register, and we may need to @@ -541,9 +538,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - MachineInstrBuilder CopyMI = - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase); + MachineInstrBuilder CopyMI = MachineInstrBuilder( + *MBB->getParent(), + TII->buildCopy(*MBB, InsertPos, Node->getDebugLoc(), VRBase)); if (Reg.isVirtual()) CopyMI.addReg(Reg, 0, SubIdx); else @@ -618,8 +615,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); Register NewVReg = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - NewVReg).addReg(VReg); + TII->buildCopy(*MBB, InsertPos, Node->getDebugLoc(), NewVReg, VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; @@ -1231,8 +1227,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - DestReg).addReg(SrcReg); + TII->buildCopy(*MBB, InsertPos, Node->getDebugLoc(), DestReg, SrcReg); break; } case ISD::CopyFromReg: { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d8eb97a4b47ee..0172d654055d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -112,15 +112,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - const TargetLowering &TLI, unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; unsigned Reg = cast(User->getOperand(1))->getReg(); - if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) - return; - if (Register::isVirtualRegister(Reg)) return; @@ -491,8 +487,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. - const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler @@ -828,8 +823,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, break; } } - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(VRI->second); + TII->buildCopy(*BB, InsertPos, DebugLoc(), Reg, VRI->second); } else { // Copy from physical register. assert(Pred.getReg() && "Unknown physical register!"); @@ -837,8 +831,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(Pred.getReg()); + TII->buildCopy(*BB, InsertPos, DebugLoc(), VRBase, Pred.getReg()); } break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 084f16f897554..7c45da2fc78cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4873,11 +4873,17 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, - I.getAAMetadata()); + // + // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic + // didn't yield anything useful. + MachinePointerInfo MPI; + if (Info.ptrVal) + MPI = MachinePointerInfo(Info.ptrVal, Info.offset); + else if (Info.fallbackAddressSpace) + MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, + Info.memVT, MPI, Info.align, Info.flags, + Info.size, I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5dd6cc6225573..dd385bd3432d4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1240,9 +1240,8 @@ bool SelectionDAGISel::PrepareEHLandingPad() { assert(EHPhysReg && "target lacks exception pointer register"); MBB->addLiveIn(EHPhysReg); unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); - BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), - TII->get(TargetOpcode::COPY), VReg) - .addReg(EHPhysReg, RegState::Kill); + TII->buildCopy(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), VReg, + EHPhysReg, RegState::Kill); } } return true; @@ -2198,7 +2197,7 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as // well. - CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0), + CurDAG->SelectNodeTo(N, TII->getCopyOpcode(), N->getValueType(0), N->getOperand(0)); } diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 94149f56e7035..d34a8b9c9b3ec 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -518,7 +518,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) { SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) { - const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + const MCInstrDesc &Desc = TII.get(TII.getCopyOpcode()); bool FirstCopy = !Def.isValid(); MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc) .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy) @@ -535,14 +535,14 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, } SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, - LaneBitmask LaneMask, MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { - const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + LaneBitmask LaneMask, MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + bool Late, unsigned RegIdx) { SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { // The full vreg is copied. MachineInstr *CopyMI = - BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg); + TII.buildCopy(MBB, InsertBefore, DebugLoc(), ToReg, FromReg); return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 83a7063de112d..4c2b52ef1f472 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -228,9 +228,8 @@ void SwiftErrorValueTracking::propagateVRegs() { assert(!VRegs.empty() && "No predecessors? Is the Calling Convention correct?"); Register DestReg = UUseVReg; - BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), - DestReg) - .addReg(VRegs[0].second); + TII->buildCopy(*MBB, MBB->getFirstNonPHI(), DLoc, DestReg, + VRegs[0].second); continue; } diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 86ba57d09c2a2..62d1db4120a7f 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -444,9 +444,8 @@ void TailDuplicator::duplicateInstruction( if (NewRC == nullptr) NewRC = OrigRC; Register NewReg = MRI->createVirtualRegister(NewRC); - BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), - TII->get(TargetOpcode::COPY), NewReg) - .addReg(VI->second.Reg, 0, VI->second.SubReg); + TII->buildCopy(*PredBB, NewMI, NewMI.getDebugLoc(), NewReg, + VI->second.Reg, 0, VI->second.SubReg); LocalVRMap.erase(VI); LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); MO.setReg(NewReg); @@ -1034,10 +1033,9 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, SmallVectorImpl> &CopyInfos, SmallVectorImpl &Copies) { MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); - const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); for (auto &CI : CopyInfos) { - auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) - .addReg(CI.second.Reg, 0, CI.second.SubReg); + auto C = TII->buildCopy(*MBB, Loc, DebugLoc(), CI.first, CI.second.Reg, 0, + CI.second.SubReg); Copies.push_back(C); } } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index c14b64c18b214..645de85141315 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -642,9 +642,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MachineBasicBlock::iterator Pos = MI; if (Flags == MachineMemOperand::MOStore) - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + Register()); else - loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); return &*--Pos; } diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index ac346585b0f8f..b311301a72924 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -571,10 +571,14 @@ bool TargetRegisterInfo::getCoveringSubRegIndexes( break; } - // Try to cover as much of the remaining lanes as possible but - // as few of the already covered lanes as possible. - int Cover = (SubRegMask & LanesLeft).getNumLanes() - - (SubRegMask & ~LanesLeft).getNumLanes(); + // Do not cover already-covered lanes to avoid creating cycles + // in copy bundles (= bundle contains copies that write to the + // registers). + if ((SubRegMask & ~LanesLeft).any()) + continue; + + // Try to cover as many of the remaining lanes as possible. + const int Cover = (SubRegMask & LanesLeft).getNumLanes(); if (Cover > BestCover) { BestCover = Cover; BestIdx = Idx; diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 78bf030132c35..5dfa09fdf5d3d 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1494,8 +1494,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, #endif // Emit a copy. - MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), RegA); + MachineInstrBuilder MIB = MachineInstrBuilder( + *MI->getParent()->getParent(), + TII->buildCopy(*MI->getParent(), MI, MI->getDebugLoc(), RegA)); // If this operand is folding a truncation, the truncation now moves to the // copy so that the register classes remain valid for the operands. MIB.addReg(RegB, 0, SubRegB); @@ -1831,7 +1832,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { mi->getOperand(0).setSubReg(SubIdx); mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); mi->removeOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); + mi->setDesc(TII->get(TII->getCopyOpcode())); LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); // Update LiveIntervals. @@ -1919,7 +1920,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { // Insert the sub-register copy. MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY)) + TII->get(TII->getCopyOpcode())) .addReg(DstReg, RegState::Define, SubIdx) .add(UseMO); diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 5e8514f525e9d..ff3229b747350 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -182,9 +182,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); - BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(), - TII->get(TargetOpcode::COPY), OutputReg) - .addReg(InputReg, getRegState(Input), InputSub); + TII->buildCopy(BB, BB.getFirstNonPHI(), phi->getDebugLoc(), + OutputReg, InputReg, getRegState(Input), InputSub); } phi++->eraseFromParent(); } diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 88460971338cb..fb691dba216d4 100644 --- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -122,6 +122,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { case TargetOpcode::REG_SEQUENCE: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: break; @@ -172,6 +173,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { case TargetOpcode::CFI_INSTRUCTION: case TargetOpcode::EH_LABEL: case TargetOpcode::COPY: + case TargetOpcode::PRED_COPY: case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: break; diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 069aca742da07..c386ab9352a41 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -404,6 +404,16 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { return true; } +// Returns true when all the implicit operands of the copy instruction \p MI are +// reserved registers. +static bool isCopyWithReservedImplicitOpnds(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + for (unsigned I = 2, E = MI.getNumOperands(); I != E; ++I) { + if (!MRI.isReserved(MI.getOperand(I).getReg())) + return false; + } + return true; +} void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { if (!MI.isIdentityCopy()) return; @@ -424,8 +434,11 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { // %al = COPY %al, implicit-def %eax // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL - // instruction to maintain this information. - if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) { + // instruction to maintain this information. Do not insert KILL when the + // implicit operands are all reserved registers. + if (MI.getOperand(1).isUndef() || + ((MI.getNumOperands() > 2) && + !isCopyWithReservedImplicitOpnds(MI, *MRI))) { MI.setDesc(TII->get(TargetOpcode::KILL)); LLVM_DEBUG(dbgs() << " replace by: " << MI); return; diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 6c512023f3051..95e76320729e8 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -1204,6 +1204,13 @@ Instruction *DIBuilder::insertDef(DILifetime *Lifetime, llvm::Value *Referrer, DefFn = getDefIntrin(M); trackIfUnresolved(Lifetime); + + // Ideally, the intrinsic would be able to handle any type of + // pointer. However, SelectionDAGBuilder::visitIntrinsicCall (for dbg_def) and + // InstEmitter::EmitDbgDefKill expect the intrinsic to refer directly to the + // alloca / argument and have problems handling addrspacecasts + Referrer = Referrer->stripPointerCasts(); + Value *Args[] = {MetadataAsValue::get(VMContext, Lifetime), getDbgIntrinsicValueImpl(VMContext, Referrer)}; diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 5dd114c269ccb..51828b9422bf0 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -389,6 +389,17 @@ void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key, addModuleFlag(Behavior, Key, Val); } +void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key, + Constant *Val) { + setModuleFlag(Behavior, Key, ConstantAsMetadata::get(Val)); +} + +void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key, + uint32_t Val) { + Type *Int32Ty = Type::getInt32Ty(Context); + setModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val)); +} + void Module::setDataLayout(StringRef Desc) { DL.reset(Desc); } diff --git a/llvm/lib/IR/TypeFinder.cpp b/llvm/lib/IR/TypeFinder.cpp index 904af7e737ccf..dbdcb78615443 100644 --- a/llvm/lib/IR/TypeFinder.cpp +++ b/llvm/lib/IR/TypeFinder.cpp @@ -176,6 +176,30 @@ void TypeFinder::incorporateMDNode(const MDNode *V) { return; } + // The operations in a DIExpr are not exposed as operands, so handle such + // nodes specifically here. + if (const auto *E = dyn_cast(V)) { + for (auto &&Op : E->builder()) + visit( + makeVisitor( +#define HANDLE_OP0(NAME) [](DIOp::NAME) {}, +#include "llvm/IR/DIExprOps.def" + [&](DIOp::Referrer R) { incorporateType(R.getResultType()); }, + [&](DIOp::Arg A) { incorporateType(A.getResultType()); }, + [&](DIOp::TypeObject T) { incorporateType(T.getResultType()); }, + [&](DIOp::Constant C) { incorporateValue(C.getLiteralValue()); }, + [&](DIOp::Convert C) { incorporateType(C.getResultType()); }, + [&](DIOp::Reinterpret R) { incorporateType(R.getResultType()); }, + [&](DIOp::BitOffset B) { incorporateType(B.getResultType()); }, + [&](DIOp::ByteOffset B) { incorporateType(B.getResultType()); }, + [&](DIOp::Composite C) { incorporateType(C.getResultType()); }, + [&](DIOp::Extend) {}, [&](DIOp::AddrOf) {}, + [&](DIOp::Deref D) { incorporateType(D.getResultType()); }, + [&](DIOp::PushLane P) { incorporateType(P.getResultType()); }), + Op); + return; + } + // Look in operands for types. for (Metadata *Op : V->operands()) { if (!Op) diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 286d3ca3e2cc0..828203080710e 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1148,7 +1148,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { GV->setLinkage(GlobalValue::InternalLinkage); } - RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + RegularLTO.CombinedModule->setModuleFlag(Module::Error, "LTOPostLink", 1); if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 68ef8d60beac7..6037072d16863 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -606,7 +606,7 @@ bool LTOCodeGenerator::optimize() { this->applyScopeRestrictions(); // Write LTOPostLink flag for passes that require all the modules. - MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + MergedModule->setModuleFlag(Module::Error, "LTOPostLink", 1); // Add an appropriate DataLayout instance for this module... MergedModule->setDataLayout(TargetMach->createDataLayout()); diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index eaf22eaa73a1d..e6fe9af415b3e 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -2310,11 +2310,6 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst, assert(getCurrentSectionOnly() && "Cannot emit contents before setting section!"); - if (!MAI->usesDwarfFileAndLocDirectives()) - // Now that a machine instruction has been assembled into this section, make - // a line entry for any .loc directive that has been seen. - MCDwarfLineEntry::make(this, getCurrentSectionOnly()); - // Show the encoding in a comment if we have a code emitter. AddEncodingComment(Inst, STI); diff --git a/llvm/lib/OffloadArch/amdgpu/codename2offloadarch.txt b/llvm/lib/OffloadArch/amdgpu/codename2offloadarch.txt index 7df4c0a33b62c..eb1dd697ddd29 100644 --- a/llvm/lib/OffloadArch/amdgpu/codename2offloadarch.txt +++ b/llvm/lib/OffloadArch/amdgpu/codename2offloadarch.txt @@ -6,14 +6,19 @@ CYAN_SKILLFISH gfx1013 DIMGREY_CAVEFISH gfx1032 FIJI gfx803 HAWAII gfx701 +HOTPINK_BONEFISH gfx1102 NAVI10 gfx1010 NAVI12 gfx1011 NAVI14 gfx1012 NAVY_FLOUNDER gfx1031 +PINK_SARDINE gfx1103 +PLUM_BONITO gfx1100 POLARIS10 gfx803 POLARIS11 gfx803 POLARIS12 gfx803 +RAPHAEL gfx1036 RAVEN gfx902 +REMBRANDT gfx1035 RENOIR gfx90c SIENNA_CICHLID gfx1030 SPECTRE gfx700 @@ -24,9 +29,5 @@ VEGA10 gfx900 VEGA12 gfx904 VEGA20 gfx906 VEGAM gfx803 -YELLOW_CARP gfx1035 -PLUM_BONITO gfx1100 WHEAT_NAS gfx1101 -HOTPINK_BONEFISH gfx1102 -PINK_SARDINE gfx1103 -PHOENIX gfx1103 +YELLOW_CARP gfx1035 diff --git a/llvm/lib/OffloadArch/amdgpu/pciid2codename.txt b/llvm/lib/OffloadArch/amdgpu/pciid2codename.txt index b77b9471eb22a..e76bae21d8c61 100644 --- a/llvm/lib/OffloadArch/amdgpu/pciid2codename.txt +++ b/llvm/lib/OffloadArch/amdgpu/pciid2codename.txt @@ -177,4 +177,6 @@ 1002:743F 0000 0000 BEIGE_GOBY : BEIGE_GOBY 1002:164D 0000 0000 YELLOW_CARP : YELLOW_CARP 1002:1681 0000 0000 YELLOW_CARP : YELLOW_CARP -1002:DEBF 0000 0000 PLUM_BONITO : PLUM_BONITO +1002:744C 0000 0000 PLUM_BONITO : PLUM_BONITO +1002:164d 0000 0000 REMBRANDT : Rembrandt +1002:164e 0000 0000 RAPHAEL : Raphael diff --git a/llvm/lib/OffloadArch/generated_offload_arch.h b/llvm/lib/OffloadArch/generated_offload_arch.h index 96205c15197b7..456e0e59f268d 100644 --- a/llvm/lib/OffloadArch/generated_offload_arch.h +++ b/llvm/lib/OffloadArch/generated_offload_arch.h @@ -13,6 +13,7 @@ typedef enum { AOT_GFX1033, AOT_GFX1034, AOT_GFX1035, + AOT_GFX1036, AOT_GFX1100, AOT_GFX1101, AOT_GFX1102, @@ -45,14 +46,19 @@ typedef enum { AOT_CN_DIMGREY_CAVEFISH, AOT_CN_FIJI, AOT_CN_HAWAII, + AOT_CN_HOTPINK_BONEFISH, AOT_CN_NAVI10, AOT_CN_NAVI12, AOT_CN_NAVI14, AOT_CN_NAVY_FLOUNDER, + AOT_CN_PINK_SARDINE, + AOT_CN_PLUM_BONITO, AOT_CN_POLARIS10, AOT_CN_POLARIS11, AOT_CN_POLARIS12, + AOT_CN_RAPHAEL, AOT_CN_RAVEN, + AOT_CN_REMBRANDT, AOT_CN_RENOIR, AOT_CN_SIENNA_CICHLID, AOT_CN_SPECTRE, @@ -63,12 +69,8 @@ typedef enum { AOT_CN_VEGA12, AOT_CN_VEGA20, AOT_CN_VEGAM, - AOT_CN_YELLOW_CARP, - AOT_CN_PLUM_BONITO, AOT_CN_WHEAT_NAS, - AOT_CN_HOTPINK_BONEFISH, - AOT_CN_PINK_SARDINE, - AOT_CN_PHOENIX, + AOT_CN_YELLOW_CARP, AOT_CN_K4000, AOT_CN_K4200, AOT_CN_GTX750, @@ -108,14 +110,19 @@ extern const AOT_CODENAME_ID_TO_STRING AOT_CODENAMES[] = { {AOT_CN_DIMGREY_CAVEFISH, "DIMGREY_CAVEFISH"}, {AOT_CN_FIJI, "FIJI"}, {AOT_CN_HAWAII, "HAWAII"}, + {AOT_CN_HOTPINK_BONEFISH, "HOTPINK_BONEFISH"}, {AOT_CN_NAVI10, "NAVI10"}, {AOT_CN_NAVI12, "NAVI12"}, {AOT_CN_NAVI14, "NAVI14"}, {AOT_CN_NAVY_FLOUNDER, "NAVY_FLOUNDER"}, + {AOT_CN_PINK_SARDINE, "PINK_SARDINE"}, + {AOT_CN_PLUM_BONITO, "PLUM_BONITO"}, {AOT_CN_POLARIS10, "POLARIS10"}, {AOT_CN_POLARIS11, "POLARIS11"}, {AOT_CN_POLARIS12, "POLARIS12"}, + {AOT_CN_RAPHAEL, "RAPHAEL"}, {AOT_CN_RAVEN, "RAVEN"}, + {AOT_CN_REMBRANDT, "REMBRANDT"}, {AOT_CN_RENOIR, "RENOIR"}, {AOT_CN_SIENNA_CICHLID, "SIENNA_CICHLID"}, {AOT_CN_SPECTRE, "SPECTRE"}, @@ -126,12 +133,8 @@ extern const AOT_CODENAME_ID_TO_STRING AOT_CODENAMES[] = { {AOT_CN_VEGA12, "VEGA12"}, {AOT_CN_VEGA20, "VEGA20"}, {AOT_CN_VEGAM, "VEGAM"}, - {AOT_CN_YELLOW_CARP, "YELLOW_CARP"}, - {AOT_CN_PLUM_BONITO, "PLUM_BONITO"}, {AOT_CN_WHEAT_NAS, "WHEAT_NAS"}, - {AOT_CN_HOTPINK_BONEFISH, "HOTPINK_BONEFISH"}, - {AOT_CN_PINK_SARDINE, "PINK_SARDINE"}, - {AOT_CN_PHOENIX, "PHOENIX"}, + {AOT_CN_YELLOW_CARP, "YELLOW_CARP"}, {AOT_CN_K4000, "k4000"}, {AOT_CN_K4200, "k4200"}, {AOT_CN_GTX750, "gtx750"}, @@ -156,6 +159,7 @@ extern const AOT_OFFLOADARCH_TO_STRING AOT_OFFLOADARCHS[] = { {AOT_GFX1033, "gfx1033"}, {AOT_GFX1034, "gfx1034"}, {AOT_GFX1035, "gfx1035"}, + {AOT_GFX1036, "gfx1036"}, {AOT_GFX1100, "gfx1100"}, {AOT_GFX1101, "gfx1101"}, {AOT_GFX1102, "gfx1102"}, @@ -214,7 +218,9 @@ extern const AOT_TABLE_ENTRY AOT_TABLE[] = { { 0x1002, 0x1638, AOT_CN_RENOIR, AOT_GFX90C }, { 0x1002, 0x163F, AOT_CN_VANGOGH, AOT_GFX1033 }, { 0x1002, 0x164C, AOT_CN_RENOIR, AOT_GFX90C }, +{ 0x1002, 0x164d, AOT_CN_REMBRANDT, AOT_GFX1035 }, { 0x1002, 0x164D, AOT_CN_YELLOW_CARP, AOT_GFX1035 }, +{ 0x1002, 0x164e, AOT_CN_RAPHAEL, AOT_GFX1036 }, { 0x1002, 0x1681, AOT_CN_YELLOW_CARP, AOT_GFX1035 }, { 0x1002, 0x66A0, AOT_CN_VEGA20, AOT_GFX906 }, { 0x1002, 0x66A1, AOT_CN_VEGA20, AOT_GFX906 }, @@ -354,12 +360,12 @@ extern const AOT_TABLE_ENTRY AOT_TABLE[] = { { 0x1002, 0x7422, AOT_CN_BEIGE_GOBY, AOT_GFX1034 }, { 0x1002, 0x7423, AOT_CN_BEIGE_GOBY, AOT_GFX1034 }, { 0x1002, 0x743F, AOT_CN_BEIGE_GOBY, AOT_GFX1034 }, +{ 0x1002, 0x744C, AOT_CN_PLUM_BONITO, AOT_GFX1100 }, { 0x1002, 0x9870, AOT_CN_CARRIZO, AOT_GFX801 }, { 0x1002, 0x9874, AOT_CN_CARRIZO, AOT_GFX801 }, { 0x1002, 0x9875, AOT_CN_CARRIZO, AOT_GFX801 }, { 0x1002, 0x9876, AOT_CN_CARRIZO, AOT_GFX801 }, { 0x1002, 0x9877, AOT_CN_CARRIZO, AOT_GFX801 }, -{ 0x1002, 0xDEBF, AOT_CN_PLUM_BONITO, AOT_GFX1100 }, { 0x10de, 0x0f02, AOT_CN_GT730, AOT_SM_35 }, { 0x10de, 0x0f06, AOT_CN_GT730, AOT_SM_35 }, { 0x10de, 0x0fc9, AOT_CN_GT730, AOT_SM_35 }, diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 462a86273a4ed..3986c9103754d 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2500,7 +2500,7 @@ class VersionPrinter { #ifdef PACKAGE_VENDOR OS << PACKAGE_VENDOR << " "; #else - OS << "AOMP-15.0-61 (http://github.com/ROCm-Developer-Tools/aomp):\n Source ID:15.0-61-595b0d8133fafef5742f7d39f8e6a07b31afff56\n "; + OS << "LLVM (http://llvm.org/):\n "; #endif OS << PACKAGE_NAME << " version " << PACKAGE_VERSION << "\n "; #if LLVM_IS_DEBUG_BUILD diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bc67fa20c60d1..201182b1f0130 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -611,7 +611,7 @@ void AArch64FrameLowering::resetCFIToInitialState( BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); // Flip the RA sign state. - if (MFI.shouldSignReturnAddress()) { + if (MFI.shouldSignReturnAddress(MF)) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); } @@ -1363,7 +1363,7 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, .addImm(-8) .setMIFlag(MachineInstr::FrameDestroy); - if (MF.getInfo()->needsAsyncDwarfUnwindInfo()) { + if (MF.getInfo()->needsAsyncDwarfUnwindInfo(MF)) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18)); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1382,7 +1382,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); - bool EmitCFI = AFI->needsDwarfUnwindInfo(); + bool EmitCFI = AFI->needsDwarfUnwindInfo(MF); bool HasFP = hasFP(MF); bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; @@ -1402,9 +1402,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, const auto &MFnI = *MF.getInfo(); if (needsShadowCallStackPrologueEpilogue(MF)) emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI, - MFnI.needsDwarfUnwindInfo()); + MFnI.needsDwarfUnwindInfo(MF)); - if (MFnI.shouldSignReturnAddress()) { + if (MFnI.shouldSignReturnAddress(MF)) { unsigned PACI; if (MFnI.shouldSignWithBKey()) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) @@ -1876,7 +1876,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB, bool NeedsWinCFI, bool *HasWinCFI) { const auto &MFI = *MF.getInfo(); - if (!MFI.shouldSignReturnAddress()) + if (!MFI.shouldSignReturnAddress(MF)) return; const AArch64Subtarget &Subtarget = MF.getSubtarget(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -1936,7 +1936,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool NeedsWinCFI = needsWinCFI(MF); - bool EmitCFI = MF.getInfo()->needsAsyncDwarfUnwindInfo(); + bool EmitCFI = + MF.getInfo()->needsAsyncDwarfUnwindInfo(MF); bool HasWinCFI = false; bool IsFunclet = false; auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); @@ -3748,11 +3749,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, EndOffset = Instr.Offset + Instr.Size; } + const MachineFunction *MF = MBB->getParent(); // Multiple FP/SP updates in a loop cannot be described by CFI instructions. - TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ - !MBB->getParent() - ->getInfo() - ->needsAsyncDwarfUnwindInfo()); + TSE.emitCode( + InsertI, TFI, /*TryMergeSPUpdate = */ + !MF->getInfo()->needsAsyncDwarfUnwindInfo(*MF)); return InsertI; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index f558cf2c2cbf6..1dc3980f160a4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3783,10 +3783,12 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, .addMemOperand(MMO); } -void AArch64InstrInfo::storeRegToStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -3937,10 +3939,12 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, .addMemOperand(MMO); } -void AArch64InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -4505,10 +4509,10 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, - getRegClass(SrcReg), &TRI); + getRegClass(SrcReg), &TRI, Register()); else loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, - getRegClass(DstReg), &TRI); + getRegClass(DstReg), &TRI, Register()); return &*--InsertPt; } @@ -4554,7 +4558,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( if (unsigned WidenedSrcReg = TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), - FrameIndex, SpillRC, &TRI); + FrameIndex, SpillRC, &TRI, Register()); return &*--InsertPt; } } @@ -4589,7 +4593,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); - loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI, + Register()); MachineInstr &LoadMI = *--InsertPt; MachineOperand &LoadDst = LoadMI.getOperand(0); assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); @@ -7784,7 +7789,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(AArch64::SP, RegState::InternalRead); MI.setMIFlag(MachineInstr::FrameSetup); - if (MF.getInfo()->needsDwarfUnwindInfo()) { + if (MF.getInfo()->needsDwarfUnwindInfo(MF)) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION)) @@ -7883,7 +7888,7 @@ void AArch64InstrInfo::buildOutlinedFrame( .addImm(-16); It = MBB.insert(It, STRXpre); - if (MF.getInfo()->needsDwarfUnwindInfo()) { + if (MF.getInfo()->needsDwarfUnwindInfo(MF)) { const TargetSubtargetInfo &STI = MF.getSubtarget(); const MCRegisterInfo *MRI = STI.getRegisterInfo(); unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 96e16b0d1ee93..1057b6255e730 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -182,12 +182,14 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; // This tells target independent code that it is okay to pass instructions // with subreg operands to foldMemoryOperandImpl. diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index 469e1448602c0..961a19317d666 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -66,12 +66,12 @@ static std::pair GetSignReturnAddress(const Function &F) { return {true, false}; } -static bool ShouldSignWithBKey(const Function &F, const MachineFunction &MF) { +static bool ShouldSignWithBKey(const Function &F, const AArch64Subtarget &STI) { if (!F.hasFnAttribute("sign-return-address-key")) { if (const auto *BKey = mdconst::extract_or_null( F.getParent()->getModuleFlag("sign-return-address-with-bkey"))) return BKey->getZExtValue(); - if (MF.getTarget().getTargetTriple().isOSWindows()) + if (STI.getTargetTriple().isOSWindows()) return true; return false; } @@ -82,15 +82,14 @@ static bool ShouldSignWithBKey(const Function &F, const MachineFunction &MF) { return Key.equals_insensitive("b_key"); } -AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF_) : MF(&MF_) { +AArch64FunctionInfo::AArch64FunctionInfo(const Function &F, + const AArch64Subtarget *STI) { // If we already know that the function doesn't have a redzone, set // HasRedZone here. - if (MF->getFunction().hasFnAttribute(Attribute::NoRedZone)) + if (F.hasFnAttribute(Attribute::NoRedZone)) HasRedZone = false; - - const Function &F = MF->getFunction(); std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F); - SignWithBKey = ShouldSignWithBKey(F, *MF); + SignWithBKey = ShouldSignWithBKey(F, *STI); // TODO: skip functions that have no instrumented allocas for optimization IsMTETagged = F.hasFnAttribute(Attribute::SanitizeMemTag); @@ -112,9 +111,7 @@ MachineFunctionInfo *AArch64FunctionInfo::clone( BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) const { - AArch64FunctionInfo *InfoClone = DestMF.cloneInfo(*this); - InfoClone->MF = &DestMF; - return InfoClone; + return DestMF.cloneInfo(*this); } bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const { @@ -125,27 +122,30 @@ bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const { return SpillsLR; } -bool AArch64FunctionInfo::shouldSignReturnAddress() const { +bool AArch64FunctionInfo::shouldSignReturnAddress( + const MachineFunction &MF) const { return shouldSignReturnAddress(llvm::any_of( - MF->getFrameInfo().getCalleeSavedInfo(), + MF.getFrameInfo().getCalleeSavedInfo(), [](const auto &Info) { return Info.getReg() == AArch64::LR; })); } -bool AArch64FunctionInfo::needsDwarfUnwindInfo() const { +bool AArch64FunctionInfo::needsDwarfUnwindInfo( + const MachineFunction &MF) const { if (!NeedsDwarfUnwindInfo) - NeedsDwarfUnwindInfo = MF->needsFrameMoves() && - !MF->getTarget().getMCAsmInfo()->usesWindowsCFI(); + NeedsDwarfUnwindInfo = MF.needsFrameMoves() && + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); return *NeedsDwarfUnwindInfo; } -bool AArch64FunctionInfo::needsAsyncDwarfUnwindInfo() const { +bool AArch64FunctionInfo::needsAsyncDwarfUnwindInfo( + const MachineFunction &MF) const { if (!NeedsAsyncDwarfUnwindInfo) { - const Function &F = MF->getFunction(); + const Function &F = MF.getFunction(); // The check got "minsize" is because epilogue unwind info is not emitted // (yet) for homogeneous epilogues, outlined functions, and functions // outlined from. - NeedsAsyncDwarfUnwindInfo = needsDwarfUnwindInfo() && + NeedsAsyncDwarfUnwindInfo = needsDwarfUnwindInfo(MF) && F.getUWTableKind() == UWTableKind::Async && !F.hasMinSize(); } diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index c11506c898fa9..5e4c5926c371a 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -31,14 +31,12 @@ namespace yaml { struct AArch64FunctionInfo; } // end namespace yaml +class AArch64Subtarget; class MachineInstr; /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and /// contains private AArch64-specific information for each MachineFunction. class AArch64FunctionInfo final : public MachineFunctionInfo { - /// Backreference to the machine function. - MachineFunction *MF; - /// Number of bytes of arguments this function has on the stack. If the callee /// is expected to restore the argument stack this should be a multiple of 16, /// all usable during a tail call. @@ -199,7 +197,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { mutable Optional NeedsAsyncDwarfUnwindInfo; public: - explicit AArch64FunctionInfo(MachineFunction &MF); + AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI); MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, @@ -433,7 +431,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { CalleeSaveBaseToFrameRecordOffset = Offset; } - bool shouldSignReturnAddress() const; + bool shouldSignReturnAddress(const MachineFunction &MF) const; bool shouldSignReturnAddress(bool SpillsLR) const; bool shouldSignWithBKey() const { return SignWithBKey; } @@ -451,8 +449,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { } int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } - bool needsDwarfUnwindInfo() const; - bool needsAsyncDwarfUnwindInfo() const; + bool needsDwarfUnwindInfo(const MachineFunction &MF) const; + bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const; private: // Hold the lists of LOHs. diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td index e378b043d37e6..d34d567f961b7 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -205,6 +205,7 @@ def : ReadAdvance; +def : InstRW<[WriteI], (instrs PRED_COPY)>; //--- // Vector Loads diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td index 141cc6b79c8b8..7998119b41bd6 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -270,6 +270,7 @@ def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>; def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; //--- // Vector Loads - 64-bit per cycle //--- diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index 8ce2293740000..992092d9b6633 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -132,6 +132,7 @@ def : ReadAdvance; // ----------------------------------------------------------------------------- def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // Branch Instructions diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index dec56e7f8d38e..c35fcbba4a668 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -612,6 +612,7 @@ def : InstRW<[WriteI], "CSNEG(W|X)r")>; def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // ALU, extend and/or shift def : WriteRes { diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td index b8d5a70d7ec64..60687a13742bf 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td @@ -687,6 +687,7 @@ def : ReadAdvance; // Specialising the scheduling model further for Ampere-1. def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>; +def : InstRW<[Ampere1Write_1cyc_1AB], (instrs PRED_COPY)>; // Branch instructions def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index e2d916954060d..f0b713fe19fb5 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -127,7 +127,7 @@ def WriteMov : SchedWriteVariant<[ SchedVar, SchedVar, SchedVar]>; -def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; +def : InstRW<[WriteMov], (instrs COPY,PRED_COPY,ORRXrr,ADDXrr)>; // Move non-zero immediate is an integer ALU op. // MOVN,MOVZ,MOVK diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index f2863f5a8e3b6..1b3c21567ad82 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -507,6 +507,7 @@ def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>; // Move instructions. def : InstRW<[M3WriteCOPY], (instrs COPY)>; +def : InstRW<[M3WriteCOPY], (instrs PRED_COPY)>; def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>; def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index ab1e680f9e990..f6b2d4f241f5a 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -604,6 +604,7 @@ def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>; // Move instructions. def : InstRW<[M4WriteCOPY], (instrs COPY)>; +def : InstRW<[M4WriteCOPY], (instrs PRED_COPY)>; def : InstRW<[M4WriteZ0], (instrs ADR, ADRP)>; def : InstRW<[M4WriteZ0], (instregex "^MOV[NZ][WX]i")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index ae0b2b3eaeb63..c3ee57057d0f2 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -651,6 +651,7 @@ def : InstRW<[M5WriteA1X], (instrs CSELXr, CSINCXr, CSINVXr, CSNEGXr)>; // Move instructions. def : InstRW<[M5WriteCOPY], (instrs COPY)>; +def : InstRW<[M5WriteCOPY], (instrs PRED_COPY)>; def : InstRW<[M5WriteZ0], (instrs ADR, ADRP)>; def : InstRW<[M5WriteZ0], (instregex "^MOV[NZ][WX]i$")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td index a3a038f869fbb..84c6090699898 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -580,6 +580,7 @@ def FalkorWr_STRro : SchedWriteVariant<[ // FIXME: This could be better modeled by looking at the regclasses of the operands. def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs PRED_COPY)>; // SIMD Floating-point Instructions // ----------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td index 3551066ee7c35..1e9abd788ec23 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -131,6 +131,7 @@ def : ReadAdvance; // ----------------------------------------------------------------------------- def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // Detailed Refinedments diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index c472de8df2f8d..3334ffb8754d5 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -612,6 +612,7 @@ def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, // ----------------------------------------------------------------------------- def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // Branch Instructions // ----------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td index 542d9afb59753..3da12ff01c52c 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -122,6 +122,7 @@ def : ReadAdvance; def : ReadAdvance; def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // Detailed Refinements //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index 8b380ae0e8f3c..c70ce1eab42ea 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -266,6 +266,7 @@ def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; // Miscellaneous //--- def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; //--- // Vector Loads diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index cdafa33da0548..075fc4d6c79b6 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -436,6 +436,7 @@ def : InstRW<[WriteI], "CSNEG(W|X)r")>; def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // ALU, extend and/or shift def : WriteRes { diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td index b7d337dfa76dc..7e3df38521288 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -696,6 +696,7 @@ def : InstRW<[WriteI], "CSNEG(W|X)r")>; def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[WriteI], (instrs PRED_COPY)>; // ALU, extend and/or shift def : WriteRes { diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 329f5b433f239..ba2e644d9f9d7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -828,6 +828,13 @@ void AArch64PassConfig::addPreEmitPass2() { addPass(createUnpackMachineBundles(nullptr)); } +MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return AArch64FunctionInfo::create( + Allocator, F, static_cast(STI)); +} + yaml::MachineFunctionInfo * AArch64TargetMachine::createDefaultFuncInfoYAML() const { return new yaml::AArch64FunctionInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index beb109502ff9e..ed66bb305520f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -47,6 +47,10 @@ class AArch64TargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 531344e376c39..8b76c4e01dfa7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,6 +49,7 @@ FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); FunctionPass *createAMDGPUUseNativeCallsPass(); +ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPULateCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); @@ -176,8 +177,8 @@ extern char &SIShrinkInstructionsID; void initializeSIFixSGPRCopiesPass(PassRegistry &); extern char &SIFixSGPRCopiesID; -void initializeSIFixVGPRCopiesPass(PassRegistry &); -extern char &SIFixVGPRCopiesID; +void initializeSISimplifyPredicatedCopiesPass(PassRegistry &); +extern char &SISimplifyPredicatedCopiesID; void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; @@ -293,6 +294,9 @@ extern char &AMDGPUAnnotateUniformValuesPassID; void initializeAMDGPUCodeGenPreparePass(PassRegistry&); extern char &AMDGPUCodeGenPrepareID; +void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); +extern char &AMDGPURemoveIncompatibleFunctionsID; + void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); extern char &AMDGPULateCodeGenPrepareID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 9fd3eef9efaaa..67f2b180c3315 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -804,12 +804,6 @@ def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < "Hardware automatically inserts waitcnt before barrier" >; -def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", - "BackOffBarrier", - "true", - "Hardware supports backing off s_barrier if an exception occurs" ->; - def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", "HasTrigReducedRange", "true", @@ -1120,8 +1114,7 @@ def FeatureISAVersion9_0_A : FeatureSet< FeatureMadMacF32Insts, FeatureSupportsSRAMECC, FeaturePackedTID, - FullRate64Ops, - FeatureBackOffBarrier]>; + FullRate64Ops]>; def FeatureISAVersion9_0_C : FeatureSet< [FeatureGFX9, @@ -1159,8 +1152,7 @@ def FeatureISAVersion9_4_0 : FeatureSet< FeatureSupportsSRAMECC, FeaturePackedTID, FeatureArchitectedFlatScratch, - FullRate64Ops, - FeatureBackOffBarrier]>; + FullRate64Ops]>; // TODO: Organize more features into groups. def FeatureGroup { @@ -1195,8 +1187,7 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK, - FeatureBackOffBarrier])>; + FeatureSupportsXNACK])>; def FeatureISAVersion10_1_1 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1218,8 +1209,7 @@ def FeatureISAVersion10_1_1 : FeatureSet< FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK, - FeatureBackOffBarrier])>; + FeatureSupportsXNACK])>; def FeatureISAVersion10_1_2 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1241,8 +1231,7 @@ def FeatureISAVersion10_1_2 : FeatureSet< FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK, - FeatureBackOffBarrier])>; + FeatureSupportsXNACK])>; def FeatureISAVersion10_1_3 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1260,8 +1249,7 @@ def FeatureISAVersion10_1_3 : FeatureSet< FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK, - FeatureBackOffBarrier])>; + FeatureSupportsXNACK])>; def FeatureISAVersion10_3_0 : FeatureSet< [FeatureGFX10, @@ -1278,8 +1266,7 @@ def FeatureISAVersion10_3_0 : FeatureSet< FeatureNSAEncoding, FeatureNSAMaxSize13, FeatureWavefrontSize32, - FeatureShaderCyclesRegister, - FeatureBackOffBarrier]>; + FeatureShaderCyclesRegister]>; def FeatureISAVersion11_Common : FeatureSet< [FeatureGFX11, @@ -1298,8 +1285,7 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard, - FeatureBackOffBarrier]>; + FeatureVcmpxPermlaneHazard]>; def FeatureISAVersion11_0_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index be08b7f721051..61edf31462f56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -88,17 +88,7 @@ extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { - if (isHsaAbiVersion2(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); - } else if (isHsaAbiVersion3(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); - } else if (isHsaAbiVersion5(getGlobalSTI())) { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5()); - } else { - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); - } - } + assert(OutStreamer && "AsmPrinter constructed without streamer"); } StringRef AMDGPUAsmPrinter::getPassName() const { @@ -131,7 +121,7 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (isHsaAbiVersion3AndAbove(getGlobalSTI())) + if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) getTargetStreamer()->EmitDirectiveAMDGCNTarget(); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) @@ -140,7 +130,7 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - if (isHsaAbiVersion3AndAbove(getGlobalSTI())) + if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) return; // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. @@ -163,7 +153,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { return; if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - isHsaAbiVersion2(getGlobalSTI())) + CodeObjectVersion == AMDGPU::AMDHSA_COV2) getTargetStreamer()->EmitISAVersion(); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). @@ -223,7 +213,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { if (!MFI.isEntryFunction()) return; - if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) && + if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -241,7 +231,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { return; if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - isHsaAbiVersion2(getGlobalSTI())) + CodeObjectVersion == AMDGPU::AMDHSA_COV2) return; auto &Streamer = getTargetStreamer()->getStreamer(); @@ -269,14 +259,15 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { IsaInfo::getNumExtraSGPRs(&STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), - CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, + CodeObjectVersion); Streamer.popSection(); } void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - isHsaAbiVersion3AndAbove(getGlobalSTI())) { + CodeObjectVersion >= AMDGPU::AMDHSA_COV3) { AsmPrinter::emitFunctionEntryLabel(); return; } @@ -346,6 +337,30 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { AsmPrinter::emitGlobalVariable(GV); } +bool AMDGPUAsmPrinter::doInitialization(Module &M) { + CodeObjectVersion = AMDGPU::getCodeObjectVersion(M); + + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { + switch (CodeObjectVersion) { + case AMDGPU::AMDHSA_COV2: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); + break; + case AMDGPU::AMDHSA_COV3: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); + break; + case AMDGPU::AMDHSA_COV4: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); + break; + case AMDGPU::AMDHSA_COV5: + HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5()); + break; + default: + report_fatal_error("Unexpected code object version"); + } + } + return AsmPrinter::doInitialization(M); +} + bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Pad with s_code_end to help tools and guard against instruction prefetch // causing stale data in caches. Arguably this should be done by the linker, @@ -396,7 +411,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; } - if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; } @@ -418,9 +433,8 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( } if (CurrentProgramInfo.DynamicCallStack && - AMDGPU::getAmdhsaCodeObjectVersion() >= 5) { + CodeObjectVersion >= AMDGPU::AMDHSA_COV5) KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; - } return KernelCodeProperties; } @@ -641,7 +655,7 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) { // In the beginning all features are either 'Any' or 'NotSupported', // depending on global target features. This will cover empty modules. getTargetStreamer()->initializeTargetID( - *getGlobalSTI(), getGlobalSTI()->getFeatureString()); + *getGlobalSTI(), getGlobalSTI()->getFeatureString(), CodeObjectVersion); // If module is empty, we are done. if (M.empty()) @@ -918,7 +932,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // anything to disable it if we know the stack isn't used here. We may still // have emitted code reading it to initialize scratch, but if that's unused // reading garbage should be OK. - const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0; + const bool EnablePrivateSegment = + ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack; ProgInfo.ComputePGMRSrc2 = S_00B84C_SCRATCH_EN(EnablePrivateSegment) | S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | @@ -1117,7 +1132,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (MFI->hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) + if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 2881b8d7bccaa..b162a7f69cc3a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -39,6 +39,7 @@ struct kernel_descriptor_t; class AMDGPUAsmPrinter final : public AsmPrinter { private: + unsigned CodeObjectVersion; void initializeTargetID(const Module &M); AMDGPUResourceUsageAnalysis *ResourceUsage; @@ -92,6 +93,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { AMDGPUTargetStreamer* getTargetStreamer() const; + bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 22cda5c120c8a..215bb0ef758b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -51,8 +51,8 @@ static constexpr std::pair= AMDGPU::AMDHSA_COV5); return QUEUE_PTR; case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: @@ -92,11 +92,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, // Under V5, we need implicitarg_ptr + offsets to access private_base or // shared_base. For pre-V5, however, need to access them through queue_ptr + // offsets. - return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; + return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR : + QUEUE_PTR; case Intrinsic::trap: if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. - return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; - NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. + return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT : + QUEUE_PTR; + NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5); return QUEUE_PTR; default: return NOT_IMPLICIT_INPUT; @@ -132,7 +134,9 @@ class AMDGPUInformationCache : public InformationCache { AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, BumpPtrAllocator &Allocator, SetVector *CGSCC, TargetMachine &TM) - : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} + : InformationCache(M, AG, Allocator, CGSCC), TM(TM), + CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {} + TargetMachine &TM; enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; @@ -149,15 +153,11 @@ class AMDGPUInformationCache : public InformationCache { return ST.supportsGetDoorbellID(); } - std::pair getFlatWorkGroupSizes(const Function &F) { - const GCNSubtarget &ST = TM.getSubtarget(F); - return ST.getFlatWorkGroupSizes(F); - } + ; - std::pair - getMaximumFlatWorkGroupRange(const Function &F) { - const GCNSubtarget &ST = TM.getSubtarget(F); - return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; + /// Get code object version. + unsigned getCodeObjectVersion() const { + return CodeObjectVersion; } private: @@ -216,6 +216,7 @@ class AMDGPUInformationCache : public InformationCache { private: /// Used to determine if the Constant needs the queue pointer. DenseMap ConstantStatus; + const unsigned CodeObjectVersion; }; struct AAAMDAttributes : public StateWrapper< @@ -405,6 +406,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { auto &InfoCache = static_cast(A.getInfoCache()); bool HasApertureRegs = InfoCache.hasApertureRegs(*F); bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); + unsigned COV = InfoCache.getCodeObjectVersion(); for (Function *Callee : AAEdges.getOptimisticEdges()) { Intrinsic::ID IID = Callee->getIntrinsicID(); @@ -418,7 +420,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { bool NonKernelOnly = false; ImplicitArgumentMask AttrMask = intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, - HasApertureRegs, SupportsGetDoorbellID); + HasApertureRegs, SupportsGetDoorbellID, COV); if (AttrMask != NOT_IMPLICIT_INPUT) { if ((IsNonEntryFunc || !NonKernelOnly)) removeAssumedBits(AttrMask); @@ -432,29 +434,29 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { // Under V5, we need implicitarg_ptr + offsets to access private_base or // shared_base. We do not actually need queue_ptr. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) + if (COV >= 5) removeAssumedBits(IMPLICIT_ARG_PTR); else removeAssumedBits(QUEUE_PTR); } - if (funcRetrievesMultigridSyncArg(A)) { + if (funcRetrievesMultigridSyncArg(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "multigrid_sync_arg needs implicitarg_ptr"); removeAssumedBits(MULTIGRID_SYNC_ARG); } - if (funcRetrievesHostcallPtr(A)) { + if (funcRetrievesHostcallPtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); removeAssumedBits(HOSTCALL_PTR); } - if (funcRetrievesHeapPtr(A)) { + if (funcRetrievesHeapPtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); removeAssumedBits(HEAP_PTR); } - if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { + if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) { assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); removeAssumedBits(QUEUE_PTR); } @@ -545,34 +547,33 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { return false; } - bool funcRetrievesMultigridSyncArg(Attributor &A) { - auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); - AA::OffsetAndSize OAS(Pos, 8); - return funcRetrievesImplicitKernelArg(A, OAS); + bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV); + AA::RangeTy Range(Pos, 8); + return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesHostcallPtr(Attributor &A) { - auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); - AA::OffsetAndSize OAS(Pos, 8); - return funcRetrievesImplicitKernelArg(A, OAS); + bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) { + auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV); + AA::RangeTy Range(Pos, 8); + return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesHeapPtr(Attributor &A) { - if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) { + if (COV < 5) return false; - AA::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); - return funcRetrievesImplicitKernelArg(A, OAS); + AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); + return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesQueuePtr(Attributor &A) { - if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) + bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) { + if (COV < 5) return false; - AA::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); - return funcRetrievesImplicitKernelArg(A, OAS); + AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); + return funcRetrievesImplicitKernelArg(A, Range); } - bool funcRetrievesImplicitKernelArg(Attributor &A, - AA::OffsetAndSize OAS) { + bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) { // Check if this is a call to the implicitarg_ptr builtin and it // is used to retrieve the hostcall pointer. The implicit arg for // hostcall is not used only if every use of the implicitarg_ptr @@ -588,7 +589,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); return PointerInfoAA.forallInterferingAccesses( - OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { + Range, [](const AAPointerInfo::Access &Acc, bool IsExact) { return Acc.getRemoteInst()->isDroppable(); }); }; @@ -616,121 +617,6 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, llvm_unreachable("AAAMDAttributes is only valid for function position"); } -/// Propagate amdgpu-flat-work-group-size attribute. -struct AAAMDFlatWorkGroupSize - : public StateWrapper { - using Base = StateWrapper; - AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) - : Base(IRP, 32) {} - - /// See AbstractAttribute::getState(...). - IntegerRangeState &getState() override { return *this; } - const IntegerRangeState &getState() const override { return *this; } - - void initialize(Attributor &A) override { - Function *F = getAssociatedFunction(); - auto &InfoCache = static_cast(A.getInfoCache()); - unsigned MinGroupSize, MaxGroupSize; - std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); - intersectKnown( - ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); - - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); - } - - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Change = ChangeStatus::UNCHANGED; - - auto CheckCallSite = [&](AbstractCallSite CS) { - Function *Caller = CS.getInstruction()->getFunction(); - LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() - << "->" << getAssociatedFunction()->getName() << '\n'); - - const auto &CallerInfo = A.getAAFor( - *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - - Change |= - clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); - - return true; - }; - - bool AllCallSitesKnown = true; - if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) - return indicatePessimisticFixpoint(); - - return Change; - } - - ChangeStatus manifest(Attributor &A) override { - SmallVector AttrList; - Function *F = getAssociatedFunction(); - LLVMContext &Ctx = F->getContext(); - - auto &InfoCache = static_cast(A.getInfoCache()); - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); - - // Don't add the attribute if it's the implied default. - if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) - return ChangeStatus::UNCHANGED; - - SmallString<10> Buffer; - raw_svector_ostream OS(Buffer); - OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; - - AttrList.push_back( - Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); - return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, - /* ForceReplace */ true); - } - - const std::string getAsStr() const override { - std::string Str; - raw_string_ostream OS(Str); - OS << "AMDFlatWorkGroupSize["; - OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; - OS << ']'; - return OS.str(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} - - /// Create an abstract attribute view for the position \p IRP. - static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, - Attributor &A); - - /// See AbstractAttribute::getName() - const std::string getName() const override { - return "AAAMDFlatWorkGroupSize"; - } - - /// See AbstractAttribute::getIdAddr() - const char *getIdAddr() const override { return &ID; } - - /// This function should return true if the type of the \p AA is - /// AAAMDFlatWorkGroupSize - static bool classof(const AbstractAttribute *AA) { - return (AA->getIdAddr() == &ID); - } - - /// Unique ID (due to the unique address) - static const char ID; -}; - -const char AAAMDFlatWorkGroupSize::ID = 0; - -AAAMDFlatWorkGroupSize & -AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, - Attributor &A) { - if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) - return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); - llvm_unreachable( - "AAAMDFlatWorkGroupSize is only valid for function position"); -} - class AMDGPUAttributor : public ModulePass { public: AMDGPUAttributor() : ModulePass(ID) {} @@ -759,8 +645,8 @@ class AMDGPUAttributor : public ModulePass { AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, - &AAPointerInfo::ID}); + &AAPotentialValues::ID, &AACallEdges::ID, + &AAPointerInfo::ID, &AAPotentialConstantValues::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; @@ -773,9 +659,6 @@ class AMDGPUAttributor : public ModulePass { if (!F.isIntrinsic()) { A.getOrCreateAAFor(IRPosition::function(F)); A.getOrCreateAAFor(IRPosition::function(F)); - if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { - A.getOrCreateAAFor(IRPosition::function(F)); - } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index da145ed7563d5..0b943e3642e43 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -454,7 +454,9 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && + AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index c16d8ee51a7af..82504ea8b7c04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -79,6 +79,7 @@ static bool hasSourceMods(const MachineInstr &MI) { switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::G_SELECT: case AMDGPU::G_FDIV: case AMDGPU::G_FREM: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index 5b7a97fd9fd35..93f59081f260e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -81,9 +81,12 @@ DIExprBuilder::Iterator AMDGPUFrameLowering::insertFrameLocation( Context, MF.getTarget().getPointerSizeInBits(AllocaAddrSpace)); ConstantData *WavefrontSizeLog2 = static_cast( ConstantInt::get(IntPtrTy, ST.getWavefrontSizeLog2(), false)); - std::initializer_list IL = { - DIOp::Referrer(IntPtrTy), DIOp::Constant(WavefrontSizeLog2), DIOp::Shr(), - DIOp::Reinterpret(PointerType::get(ResultType, AllocaAddrSpace)), - DIOp::Deref(ResultType)}; - return Builder.insert(BI, IL) + IL.size(); + + SmallVector FL = { DIOp::Referrer(IntPtrTy) }; + if (!ST.enableFlatScratch()) + FL.append({ DIOp::Constant(WavefrontSizeLog2), DIOp::Shr() }); + FL.append( + { DIOp::Reinterpret(PointerType::get(ResultType, AllocaAddrSpace)), + DIOp::Deref(ResultType) }); + return Builder.insert(BI, FL) + FL.size(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp index 0aa2c88ad41b0..d0763eef75402 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -38,7 +38,8 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, return std::make_pair(Def->getOperand(1).getReg(), Offset); // FIXME: matcher should ignore copies - if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset)))) + if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset))) || + mi_match(Def->getOperand(2).getReg(), MRI, m_Pred_Copy(m_ICst(Offset)))) return std::make_pair(Def->getOperand(1).getReg(), Offset); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 5a5643c878515..f230e98e1a689 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -865,7 +865,8 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs( } msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( - const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { + const MachineFunction &MF, const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const { const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); const Function &F = MF.getFunction(); @@ -879,7 +880,7 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps( Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); @@ -931,7 +932,8 @@ void MetadataStreamerMsgPackV3::end() { void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) { auto &Func = MF.getFunction(); - auto Kern = getHSAKernelProps(MF, ProgramInfo); + auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent()); + auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || Func.getCallingConv() == CallingConv::SPIR_KERNEL); @@ -1039,7 +1041,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( Offset += 8; // Skipped. } - if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { + if (MFI.hasHostcallPtr()) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); } else { @@ -1053,7 +1055,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( Offset += 8; // Skipped. } - if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) + if (MFI.hasHeapPtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); else Offset += 8; // Skipped. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 2d89692ac90e3..21caf69aaa00c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -81,7 +81,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer { msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const; msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, - const SIProgramInfo &ProgramInfo) const; + const SIProgramInfo &ProgramInfo, + unsigned CodeObjectVersion) const; void emitVersion() override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index ae0329f13b3aa..cb6b5a3950fe7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -533,6 +533,8 @@ bool PipelineSolver::solveExact() { if (checkOptimal()) return true; + if (static_cast(CurrSyncGroupIdx) == PipelineInstrs.size()) + if (static_cast(CurrSyncGroupIdx) == PipelineInstrs.size()) return false; @@ -771,44 +773,14 @@ void MFMASmallGemmOpt::applyIGLPStrategy( const unsigned PipelineSyncID = 0; SchedGroup *SG = nullptr; - for (unsigned I = 0; I < MFMACount; ++I) { - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - + for (unsigned I = 0; I < MFMACount * 3; ++I) { SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::VMEM_READ, 1, PipelineSyncID, DAG, TII); + SchedGroupMask::DS, 2, PipelineSyncID, DAG, TII); SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII); SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::VMEM_WRITE, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - } - - for (unsigned I = 0; I < MFMACount; ++I) { - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::VMEM_READ, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::VMEM_WRITE, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); - - SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( - SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII); - SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9ee559fa86254..ee902569c742a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -306,6 +306,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); + setOperationAction(ISD::FROUNDEVEN, {MVT::f16, MVT::f32, MVT::f64}, Custom); + setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); // Expand to fneg + fadd. @@ -1158,6 +1160,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); + case ISD::FROUNDEVEN: + return LowerFROUNDEVEN(Op, DAG); case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::FLOG: @@ -2150,6 +2154,13 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); } +SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op, + SelectionDAG &DAG) const { + auto VT = Op.getValueType(); + auto Arg = Op.getOperand(0u); + return DAG.getNode(ISD::FRINT, SDLoc(Op), VT, Arg); +} + // XXX - May require not supporting f32 denormals? // Don't handle v2f16. The extra instructions to scalarize and repack around the diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 87e8bf4c436a7..e0b5f5774fe3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -59,6 +59,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 0a68966935105..d9ecdc48da54f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -119,7 +119,7 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI, bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); - I.setDesc(TII.get(TargetOpcode::COPY)); + I.setDesc(TII.get(TII.getCopyOpcode())); const MachineOperand &Src = I.getOperand(1); MachineOperand &Dst = I.getOperand(0); @@ -244,8 +244,7 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, if (MO.isReg()) { unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); Register Reg = MO.getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) - .addReg(Reg, 0, ComposedSubIdx); + TII.buildCopy(*BB, MI, MI->getDebugLoc(), DstReg, Reg, 0, ComposedSubIdx); return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), MO.isKill(), MO.isDead(), MO.isUndef(), @@ -428,8 +427,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( Register Src1Reg = I.getOperand(3).getReg(); if (HasCarryIn) { - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) - .addReg(I.getOperand(4).getReg()); + TII.buildCopy(*BB, &I, DL, AMDGPU::SCC, I.getOperand(4).getReg()); } unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; @@ -438,8 +436,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg) .add(I.getOperand(2)) .add(I.getOperand(3)); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) - .addReg(AMDGPU::SCC); + TII.buildCopy(*BB, &I, DL, Dst1Reg, AMDGPU::SCC); if (!MRI->getRegClassOrNull(Dst1Reg)) MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); @@ -515,8 +512,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I, *SrcRC, I.getOperand(1)); const DebugLoc &DL = I.getDebugLoc(); - BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg) - .addReg(SrcReg, 0, SubReg); + TII.buildCopy(*BB, &I, DL, DstReg, SrcReg, 0, SubReg); I.eraseFromParent(); return true; @@ -588,8 +584,7 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { ArrayRef SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); for (int I = 0, E = NumDst; I != E; ++I) { MachineOperand &Dst = MI.getOperand(I); - BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) - .addReg(SrcReg, 0, SubRegs[I]); + TII.buildCopy(*BB, &MI, DL, Dst.getReg(), SrcReg, 0, SubRegs[I]); // Make sure the subregister index is valid for the source register. SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]); @@ -675,7 +670,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const { // (build_vector $src0, undef) -> copy $src0 MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { - MI.setDesc(TII.get(AMDGPU::COPY)); + MI.setDesc(TII.get(TII.getCopyOpcode())); MI.removeOperand(2); const auto &RC = IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; @@ -879,8 +874,7 @@ bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const { const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(M0Val); + TII.buildCopy(*MBB, &MI, DL, AMDGPU::M0, M0Val); BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov) .addImm(2) .addImm(MI.getOperand(4).getImm()) // $attr @@ -946,8 +940,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { // VALU. Constrain to a different SGPR to help avoid needing a nop later. RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI); - BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(LaneSelect); + TII.buildCopy(*MBB, *MIB, DL, AMDGPU::M0, LaneSelect); MIB.addReg(AMDGPU::M0); } } @@ -1159,8 +1152,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(2)) .add(I.getOperand(3)); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) - .addReg(AMDGPU::SCC); + TII.buildCopy(*BB, &I, DL, CCReg, AMDGPU::SCC); bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI); @@ -1242,12 +1234,12 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0); } else if (Value == -1) { // all ones Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg); + TII.buildCopy(*BB, &I, DL, DstReg, SrcReg); } else return false; } else { Register SrcReg = I.getOperand(2).getReg(); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg); + TII.buildCopy(*BB, &I, DL, DstReg, SrcReg); } I.eraseFromParent(); @@ -1337,8 +1329,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { Register ReturnAddrReg = TRI.getReturnAddressReg(MF); Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg, AMDGPU::SReg_64RegClass, DL); - BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) - .addReg(LiveIn); + TII.buildCopy(*MBB, &I, DL, DstReg, LiveIn); I.eraseFromParent(); return true; } @@ -1403,8 +1394,7 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( unsigned Offset = Offset0 | (Offset1 << 8); Register M0Val = MI.getOperand(2).getReg(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(M0Val); + TII.buildCopy(*MBB, &MI, DL, AMDGPU::M0, M0Val); Register DstReg = MI.getOperand(0).getReg(); Register ValReg = MI.getOperand(3).getReg(); @@ -1505,8 +1495,7 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, .addReg(BaseOffset) .addImm(16); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(M0Base); + TII.buildCopy(*MBB, &MI, DL, AMDGPU::M0, M0Base); } // The resource id offset is computed as ( + M0[21:16] + @@ -1550,8 +1539,7 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI, const DebugLoc &DL = MI.getDebugLoc(); const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(PtrBase); + TII.buildCopy(*MBB, &MI, DL, AMDGPU::M0, PtrBase); if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI)) return false; @@ -1762,8 +1750,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( MIB.addDef(TmpReg); if (!MRI->use_empty(VDataOut)) { - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut) - .addReg(TmpReg, RegState::Kill, SubReg); + TII.buildCopy(*MBB, &MI, DL, VDataOut, TmpReg, RegState::Kill, SubReg); } } else { @@ -1933,8 +1920,7 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { if (!isVCC(CCReg, *MRI)) { unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; - MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) - .addReg(CCReg); + MachineInstr *CopySCC = TII.buildCopy(*BB, &I, DL, AMDGPU::SCC, CCReg); // The generic constrainSelectedInstRegOperands doesn't work for the scc register // bank, because it does not cover the register class that we used to represent @@ -2033,10 +2019,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { Register LoReg = MRI->createVirtualRegister(DstRC); Register HiReg = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg) - .addReg(SrcReg, 0, AMDGPU::sub0); - BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg) - .addReg(SrcReg, 0, AMDGPU::sub1); + TII.buildCopy(*MBB, I, DL, LoReg, SrcReg, 0, AMDGPU::sub0); + TII.buildCopy(*MBB, I, DL, HiReg, SrcReg, 0, AMDGPU::sub1); if (IsVALU && STI.hasSDWA()) { // Write the low 16-bits of the high element into the high 16-bits of the @@ -2106,7 +2090,7 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { I.getOperand(1).setSubReg(SubRegIdx); } - I.setDesc(TII.get(TargetOpcode::COPY)); + I.setDesc(TII.get(TII.getCopyOpcode())); return true; } @@ -2365,10 +2349,8 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const { Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg) - .addReg(Src, 0, AMDGPU::sub0); - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg) - .addReg(Src, 0, AMDGPU::sub1); + TII.buildCopy(*BB, &MI, DL, LoReg, Src, 0, AMDGPU::sub0); + TII.buildCopy(*BB, &MI, DL, HiReg, Src, 0, AMDGPU::sub1); BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg) .addImm(0x80000000); @@ -2406,10 +2388,8 @@ bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const { !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI)) return false; - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg) - .addReg(Src, 0, AMDGPU::sub0); - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg) - .addReg(Src, 0, AMDGPU::sub1); + TII.buildCopy(*BB, &MI, DL, LoReg, Src, 0, AMDGPU::sub0); + TII.buildCopy(*BB, &MI, DL, HiReg, Src, 0, AMDGPU::sub1); BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg) .addImm(0x7fffffff); @@ -2526,7 +2506,7 @@ static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { MachineInstr &MI = *MRI.getUniqueVRegDef(Reg); const unsigned Opcode = MI.getOpcode(); - if (Opcode == AMDGPU::COPY) + if (MI.isCopy()) return isVCmpResult(MI.getOperand(1).getReg(), MRI); if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR || @@ -2587,8 +2567,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { if (!MRI->getRegClassOrNull(CondReg)) MRI->setRegClass(CondReg, ConstrainRC); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) - .addReg(CondReg); + TII.buildCopy(*BB, &I, DL, CondPhysReg, CondReg); BuildMI(*BB, &I, DL, TII.get(BrOpcode)) .addMBB(I.getOperand(1).getMBB()); @@ -2672,10 +2651,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { Register LoReg = MRI->createVirtualRegister(&RegRC); // Extract the subregisters from the source pointer. - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) - .addReg(SrcReg, 0, AMDGPU::sub0); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) - .addReg(SrcReg, 0, AMDGPU::sub1); + TII.buildCopy(*BB, &I, DL, LoReg, SrcReg, 0, AMDGPU::sub0); + TII.buildCopy(*BB, &I, DL, HiReg, SrcReg, 0, AMDGPU::sub1); Register MaskedLo, MaskedHi; @@ -2687,8 +2664,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { Register MaskLo = MRI->createVirtualRegister(&RegRC); MaskedLo = MRI->createVirtualRegister(&RegRC); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo) - .addReg(MaskReg, 0, AMDGPU::sub0); + TII.buildCopy(*BB, &I, DL, MaskLo, MaskReg, 0, AMDGPU::sub0); BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo) .addReg(LoReg) .addReg(MaskLo); @@ -2701,8 +2677,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { Register MaskHi = MRI->createVirtualRegister(&RegRC); MaskedHi = MRI->createVirtualRegister(&RegRC); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi) - .addReg(MaskReg, 0, AMDGPU::sub1); + TII.buildCopy(*BB, &I, DL, MaskHi, MaskReg, 0, AMDGPU::sub1); BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi) .addReg(HiReg) .addReg(MaskHi); @@ -2785,8 +2760,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( if (DstTy.getSizeInBits() != 32 && !Is64) return false; - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(IdxReg); + TII.buildCopy(*BB, &MI, DL, AMDGPU::M0, IdxReg); unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32; BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg) @@ -2800,8 +2774,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( return false; if (!STI.useVGPRIndexMode()) { - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(IdxReg); + TII.buildCopy(*BB, &MI, DL, AMDGPU::M0, IdxReg); BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg) .addReg(SrcReg, 0, SubReg) .addReg(SrcReg, RegState::Implicit); @@ -2869,8 +2842,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( const DebugLoc &DL = MI.getDebugLoc(); if (!IndexMode) { - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .addReg(IdxReg); + TII.buildCopy(*BB, &MI, DL, AMDGPU::M0, IdxReg); const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo( VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID); @@ -2937,8 +2909,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .add(MI.getOperand(2)); + BuildMI(*MBB, &MI, DL, TII.get(TII.getCopyOpcode()), AMDGPU::M0) + .add(MI.getOperand(2)); auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)); @@ -3026,8 +2998,8 @@ bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) - .add(MI.getOperand(2)); + BuildMI(*MBB, &MI, DL, TII.get(TII.getCopyOpcode()), AMDGPU::M0) + .add(MI.getOperand(2)); Register Addr = MI.getOperand(1).getReg(); Register VOffset; @@ -3356,9 +3328,8 @@ std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( // we now have an SGPR register source. To avoid potentially violating the // constant bus restriction, we need to insert a copy to a VGPR. Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc); - BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), - TII.get(AMDGPU::COPY), VGPRSrc) - .addReg(Src); + TII.buildCopy(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), VGPRSrc, + Src); Src = VGPRSrc; } @@ -4150,7 +4121,9 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset( // FIXME: Copy check is a hack Register BasePtr; - if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) { + if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset)))) || + mi_match(Reg, *MRI, + m_GPtrAdd(m_Reg(BasePtr), m_Pred_Copy(m_ICst(Offset))))) { if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset)) return {}; const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 604d0b640f069..ebaf938f4d830 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1850,7 +1850,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); // For code object version 5, private_base and shared_base are passed through // implicit kernargs. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= + AMDGPU::AMDHSA_COV5) { AMDGPUTargetLowering::ImplicitParameter Param = AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE : AMDGPUTargetLowering::PRIVATE_BASE; @@ -5219,20 +5220,13 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return legalizeTrapEndpgm(MI, MRI, B); - if (Optional HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) { - switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - return legalizeTrapHsaQueuePtr(MI, MRI, B); - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - return ST.supportsGetDoorbellID() ? - legalizeTrapHsa(MI, MRI, B) : - legalizeTrapHsaQueuePtr(MI, MRI, B); - } - } + const Module *M = B.getMF().getFunction().getParent(); + unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); + if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) + return legalizeTrapHsaQueuePtr(MI, MRI, B); - llvm_unreachable("Unknown trap handler"); + return ST.supportsGetDoorbellID() ? + legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B); } bool AMDGPULegalizerInfo::legalizeTrapEndpgm( @@ -5249,7 +5243,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr( Register SGPR01(AMDGPU::SGPR0_SGPR1); // For code object version 5, queue_ptr is passed through implicit kernarg. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >= + AMDGPU::AMDHSA_COV5) { AMDGPUTargetLowering::ImplicitParameter Param = AMDGPUTargetLowering::QUEUE_PTR; uint64_t Offset = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 56e5e07084925..26074cf060714 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -322,7 +322,7 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { // TargetPassConfig for subtarget. bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { bool MadeChange = false; - bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; + bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5; Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. @@ -354,7 +354,8 @@ ModulePass *llvm::createAMDGPULowerKernelAttributesPass() { PreservedAnalyses AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { - bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; + bool IsV5OrAbove = + AMDGPU::getCodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5; Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 0712466a0e88b..6ec4178053b20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -23,14 +23,6 @@ bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue( SIMachineFunctionInfo *MFI = MF.getInfo(); const AMDGPUTargetMachine &TM = static_cast(MF.getTarget()); - if (Src == "BufferResource") { - PSV = MFI->getBufferPSV(TM); - return false; - } - if (Src == "ImageResource") { - PSV = MFI->getImagePSV(TM); - return false; - } if (Src == "GWSResource") { PSV = MFI->getGWSPSV(TM); return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 488b3be1b5c9d..bff713c370268 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -16,17 +16,15 @@ using namespace llvm; -AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) - : IsEntryFunction(AMDGPU::isEntryFunctionCC( - MF.getFunction().getCallingConv())), +AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, + const AMDGPUSubtarget &ST) + : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), IsModuleEntryFunction( - AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), - NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { - const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); + AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), + NoSignedZerosFPMath(false) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned. - const Function &F = MF.getFunction(); Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); MemoryBound = MemBoundAttr.getValueAsBool(); @@ -46,6 +44,11 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); + + // FIXME: Shouldn't be target specific + Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); + NoSignedZerosFPMath = + NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; } unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 9a481b5f697bf..5e4a08a799ff1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -20,6 +20,9 @@ namespace llvm { +class AMDGPUSubtarget; +class GCNSubtarget; + class AMDGPUMachineFunction : public MachineFunctionInfo { /// A map to keep track of local memory objects and their offsets within the /// local memory space. @@ -61,7 +64,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { bool WaveLimiter = false; public: - AMDGPUMachineFunction(const MachineFunction &MF); + AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); uint64_t getExplicitKernArgSize() const { return ExplicitKernArgSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index a9f1e9bd09963..94cd6047290b2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -116,6 +116,7 @@ struct AMDGPUPerfHint { bool isGlobalAddr(const Value *V) const; bool isLocalAddr(const Value *V) const; + bool isConstantAddr(const Value *V) const; bool isGlobalLoadUsedInBB(const Instruction &) const; }; @@ -153,7 +154,7 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { if (auto LD = dyn_cast(V)) { auto M = LD->getPointerOperand(); - if (isGlobalAddr(M)) { + if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) { LLVM_DEBUG(dbgs() << " is IA\n"); return true; } @@ -348,6 +349,15 @@ bool AMDGPUPerfHint::isLocalAddr(const Value *V) const { return false; } +bool AMDGPUPerfHint::isConstantAddr(const Value *V) const { + if (auto PT = dyn_cast(V->getType())) { + unsigned As = PT->getAddressSpace(); + return As == AMDGPUAS::CONSTANT_ADDRESS || + As == AMDGPUAS::CONSTANT_ADDRESS_32BIT; + } + return false; +} + bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) { LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n'); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index dafbeaeaec528..cfa4492f616e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -55,7 +55,10 @@ static constexpr const FeatureBitset TargetFeatures = { // Attributes to propagate. // TODO: Support conservative min/max merging instead of cloning. -static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"}; +static constexpr const char* AttributeNames[] = { + "amdgpu-waves-per-eu", + "amdgpu-flat-work-group-size" +}; static constexpr unsigned NumAttr = sizeof(AttributeNames) / sizeof(AttributeNames[0]); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index b3671eee3553b..7b29483e9bf90 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -94,7 +94,7 @@ Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) { // Search for existing copy of Reg to vgpr. for (MachineInstr &Use : MRI.use_instructions(Reg)) { Register Def = Use.getOperand(0).getReg(); - if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def)) + if (Use.isCopy() && isVgprRegBank(Def)) return Def; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5d13321e9eb08..0042e30650637 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3115,7 +3115,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( while (Start->getOpcode() != FrameSetupOpcode) { --Start; bool IsCopy = false; - if (Start->getOpcode() == AMDGPU::COPY) { + if (Start->isCopy()) { auto &Dst = Start->getOperand(0); if (Dst.isReg()) { Register Reg = Dst.getReg(); @@ -3155,7 +3155,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( while (End->getOpcode() != FrameDestroyOpcode) { ++End; bool IsCopy = false; - if (End->getOpcode() == AMDGPU::COPY) { + if (End->isCopy()) { auto &Src = End->getOperand(1); if (Src.isReg()) { Register Reg = Src.getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp new file mode 100644 index 0000000000000..c5ca2ef2b2490 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp @@ -0,0 +1,189 @@ +//===-- AMDGPURemoveIncompatibleFunctions.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass replaces all uses of functions that use GPU features +/// incompatible with the current GPU with null then deletes the function. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "amdgpu-remove-incompatible-functions" + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV + AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1]; +} + +namespace { + +using Generation = AMDGPUSubtarget::Generation; + +class AMDGPURemoveIncompatibleFunctions : public ModulePass { +public: + static char ID; + + AMDGPURemoveIncompatibleFunctions(const TargetMachine *TM = nullptr) + : ModulePass(ID), TM(TM) { + assert(TM && "No TargetMachine!"); + } + + StringRef getPassName() const override { + return "AMDGPU Remove Incompatible Functions"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override {} + + /// Checks a single function, returns true if the function must be deleted. + bool checkFunction(Function &F); + + bool runOnModule(Module &M) override { + assert(TM->getTargetTriple().isAMDGCN()); + + SmallVector FnsToDelete; + for (Function &F : M) { + if (checkFunction(F)) + FnsToDelete.push_back(&F); + } + + for (Function *F : FnsToDelete) { + F->replaceAllUsesWith(ConstantPointerNull::get(F->getType())); + F->eraseFromParent(); + } + return !FnsToDelete.empty(); + } + +private: + const TargetMachine *TM = nullptr; +}; + +StringRef getFeatureName(unsigned Feature) { + for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) + if (Feature == KV.Value) + return KV.Key; + + llvm_unreachable("Unknown Target feature"); +} + +const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST, + StringRef GPUName) { + for (const SubtargetSubTypeKV &KV : ST.getAllProcessorDescriptions()) + if (StringRef(KV.Key) == GPUName) + return &KV; + + return nullptr; +} + +constexpr unsigned FeaturesToCheck[] = { + AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts, + AMDGPU::FeatureGFX9Insts, AMDGPU::FeatureGFX8Insts, + AMDGPU::FeatureDPP, AMDGPU::Feature16BitInsts, + AMDGPU::FeatureDot1Insts, AMDGPU::FeatureDot2Insts, + AMDGPU::FeatureDot3Insts, AMDGPU::FeatureDot4Insts, + AMDGPU::FeatureDot5Insts, AMDGPU::FeatureDot6Insts, + AMDGPU::FeatureDot7Insts, AMDGPU::FeatureDot8Insts, +}; + +FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) { + FeatureBitset Result = Features; + for (const SubtargetFeatureKV &FE : AMDGPUFeatureKV) { + if (Features.test(FE.Value) && FE.Implies.any()) + Result |= expandImpliedFeatures(FE.Implies.getAsBitset()); + } + return Result; +} + +static int DK_IncompatibleFn = getNextAvailablePluginDiagnosticKind(); + +struct DiagnosticInfoRemovingIncompatibleFunction + : public DiagnosticInfoWithLocationBase { + DiagnosticInfoRemovingIncompatibleFunction(Function &F, Twine M) + : DiagnosticInfoWithLocationBase(DiagnosticKind(DK_IncompatibleFn), + DS_Remark, F, DiagnosticLocation()), + Msg(M.str()) {} + + void print(DiagnosticPrinter &DP) const override { + DP << getFunction().getName() << ": removing function: " << Msg; + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_IncompatibleFn; + } + + std::string Msg; +}; + +} // end anonymous namespace + +bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) { + if (F.isDeclaration()) + return false; + + const GCNSubtarget *ST = + static_cast(TM->getSubtargetImpl(F)); + + // Check the GPU isn't generic. Generic is used for testing only + // and we don't want this pass to interfere with it. + StringRef GPUName = ST->getCPU(); + if (GPUName.empty() || GPUName.contains("generic")) + return false; + + // Try to fetch the GPU's info. If we can't, it's likely an unknown processor + // so just bail out. + const SubtargetSubTypeKV *GPUInfo = getGPUInfo(*ST, GPUName); + if (!GPUInfo) + return false; + + LLVMContext &Ctx = F.getContext(); + + // Get all the features implied by the current GPU, and recursively expand + // the features that imply other features. + // + // e.g. GFX90A implies FeatureGFX9, and FeatureGFX9 implies a whole set of + // other features. + const FeatureBitset GPUFeatureBits = + expandImpliedFeatures(GPUInfo->Implies.getAsBitset()); + + // Now that the have a FeatureBitset containing all possible features for + // the chosen GPU, check our list of "suspicious" features. + + // Check that the user didn't enable any features that aren't part of that + // GPU's feature set. We only check a predetermined set of features. + for (unsigned Feature : FeaturesToCheck) { + if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) { + DiagnosticInfoRemovingIncompatibleFunction DiagInfo( + F, "+" + getFeatureName(Feature) + + " is not supported on the current target"); + Ctx.diagnose(DiagInfo); + return true; + } + } + + return false; +} + +INITIALIZE_PASS(AMDGPURemoveIncompatibleFunctions, DEBUG_TYPE, + "AMDGPU Remove Incompatible Functions", false, false) + +char AMDGPURemoveIncompatibleFunctions::ID = 0; + +ModulePass * +llvm::createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *TM) { + return new AMDGPURemoveIncompatibleFunctions(TM); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index ede2b2b671c17..a1067d77bc059 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -111,7 +111,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) { // By default, for code object v5 and later, track only the minimum scratch // size - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) { + if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5) { if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences()) AssumedStackSizeForDynamicSizeObjects = 0; if (!AssumedStackSizeForExternalCall.getNumOccurrences()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index e33f5d079915b..d3c7620afdb54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -523,7 +523,9 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const { return 16; // Assume all implicit inputs are used by default - unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56; + const Module *M = F.getParent(); + unsigned NBytes = + AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56; return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 55a450b5a1f7f..1a87c4a329276 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -26,6 +26,7 @@ #include "GCNSchedStrategy.h" #include "GCNVOPDUtils.h" #include "R600.h" +#include "R600MachineFunctionInfo.h" #include "R600TargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" @@ -216,6 +217,12 @@ static cl::opt EarlyInlineAll( cl::init(false), cl::Hidden); +static cl::opt RemoveIncompatibleFunctions( + "amdgpu-enable-remove-incompatible-functions", cl::Hidden, + cl::desc("Enable removal of functions when they" + "use features not supported by the target GPU"), + cl::init(true)); + static cl::opt EnableSDWAPeephole( "amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), @@ -351,10 +358,10 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILowerI1CopiesPass(*PR); initializeSILowerSGPRSpillsPass(*PR); initializeSIFixSGPRCopiesPass(*PR); - initializeSIFixVGPRCopiesPass(*PR); initializeSIFoldOperandsPass(*PR); initializeSIPeepholeSDWAPass(*PR); initializeSIShrinkInstructionsPass(*PR); + initializeSISimplifyPredicatedCopiesPass(*PR); initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); initializeSILoadStoreOptimizerPass(*PR); @@ -379,6 +386,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPULateCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); initializeAMDGPUPropagateAttributesLatePass(*PR); + initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); initializeAMDGPUReplaceLDSUseWithPointerPass(*PR); initializeAMDGPULowerModuleLDSPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); @@ -880,7 +888,6 @@ AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const { case PseudoSourceValue::JumpTable: case PseudoSourceValue::GlobalValueCallEntry: case PseudoSourceValue::ExternalSymbolCallEntry: - case PseudoSourceValue::TargetCustom: return AMDGPUAS::CONSTANT_ADDRESS; } return AMDGPUAS::FLAT_ADDRESS; @@ -1128,6 +1135,9 @@ void AMDGPUPassConfig::addIRPasses() { void AMDGPUPassConfig::addCodeGenPrepare() { if (TM->getTargetTriple().getArch() == Triple::amdgcn) { + if (RemoveIncompatibleFunctions) + addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM)); + addPass(createAMDGPUAttributorPass()); // FIXME: This pass adds 2 hacky attributes that can be replaced with an @@ -1177,6 +1187,13 @@ AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const { return DAG; } +MachineFunctionInfo *R600TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return R600MachineFunctionInfo::create( + Allocator, F, static_cast(STI)); +} + //===----------------------------------------------------------------------===// // GCN Pass Setup //===----------------------------------------------------------------------===// @@ -1365,6 +1382,8 @@ void GCNPassConfig::addOptimizedRegAlloc() { bool GCNPassConfig::addPreRewrite() { if (EnableRegReassign) addPass(&GCNNSAReassignID); + + addPass(&SISimplifyPredicatedCopiesID); return true; } @@ -1415,6 +1434,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { addPass(&SILowerSGPRSpillsID); addPass(createVGPRAllocPass(false)); + addPass(&SISimplifyPredicatedCopiesID); return true; } @@ -1442,7 +1462,6 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { } void GCNPassConfig::addPostRegAlloc() { - addPass(&SIFixVGPRCopiesID); if (getOptLevel() > CodeGenOpt::None) addPass(&SIOptimizeExecMaskingID); TargetPassConfig::addPostRegAlloc(); @@ -1493,6 +1512,19 @@ TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(*this, PM); } +void GCNTargetMachine::registerMachineRegisterInfoCallback( + MachineFunction &MF) const { + SIMachineFunctionInfo *MFI = MF.getInfo(); + MF.getRegInfo().addDelegate(MFI); +} + +MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return SIMachineFunctionInfo::create( + Allocator, F, static_cast(STI)); +} + yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const { return new yaml::SIMachineFunctionInfo(); } @@ -1501,7 +1533,7 @@ yaml::MachineFunctionInfo * GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); return new yaml::SIMachineFunctionInfo( - *MFI, *MF.getSubtarget().getRegisterInfo(), MF); + *MFI, *MF.getSubtarget().getRegisterInfo(), MF); } bool GCNTargetMachine::parseMachineFunctionInfo( @@ -1540,6 +1572,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy)) return true; + if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy)) + return true; + auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) { // Create a diagnostic for a the register string literal. const MemoryBuffer &Buffer = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 207586e0ac0ce..aafb127c405e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -96,6 +96,12 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { return true; } + void registerMachineRegisterInfoCallback(MachineFunction &MF) const override; + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c32c56b1c8f32..eb1b07115abfc 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5372,7 +5372,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, - ReserveFlatScr); + ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); return false; } @@ -8102,7 +8102,9 @@ void AMDGPUAsmParser::onBeginOfFile() { return; if (!getTargetStreamer().getTargetID()) - getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); + getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), + // TODO: Should try to check code object version from directive??? + AMDGPU::getAmdhsaCodeObjectVersion()); if (isHsaAbiVersion3AndAbove(&getSTI())) getTargetStreamer().EmitDirectiveAMDGCNTarget(); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 3016bbc19b330..5940698784048 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -92,6 +92,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPURegBankCombiner.cpp AMDGPURegisterBankInfo.cpp AMDGPUReleaseVGPRs.cpp + AMDGPURemoveIncompatibleFunctions.cpp AMDGPUReplaceLDSUseWithPointer.cpp AMDGPUResourceUsageAnalysis.cpp AMDGPURewriteOutArguments.cpp @@ -106,6 +107,7 @@ add_llvm_target(AMDGPUCodeGen R600MachineCFGStructurizer.cpp GCNCreateVOPD.cpp GCNDPPCombine.cpp + AMDGPUResourceUsageAnalysis.cpp GCNHazardRecognizer.cpp GCNILPSched.cpp GCNIterativeScheduler.cpp @@ -136,7 +138,6 @@ add_llvm_target(AMDGPUCodeGen R600TargetTransformInfo.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp - SIFixVGPRCopies.cpp SIFoldOperands.cpp SIFormMemoryClauses.cpp SIFrameLowering.cpp @@ -163,7 +164,9 @@ add_llvm_target(AMDGPUCodeGen SIProgramInfo.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp + SISimplifyPredicatedCopies.cpp SIWholeQuadMode.cpp + GCNPreRAOptimizations.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 43a1dfc7f561b..66413a8855cfc 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2089,7 +2089,7 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5) PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index de245ef57def7..7718058ba3544 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -178,6 +178,7 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { case AMDGPU::IMPLICIT_DEF: return nullptr; case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B64_PSEUDO: case AMDGPU::V_MOV_B64_e32: diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 81013db1f0034..538d1fbc40d88 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2495,6 +2495,22 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } } + // Workaround for HW data hazard bug observed only in GFX90A. When there + // is a DGEMM instruction in-between a VALU and a VMEM instruction it + // causes the SQ to incorrectly not insert two wait states between the two + // instructions needed to avoid data hazard. + if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) { + DGEMMAfterVALUWrite = false; + if (TRI.isVectorRegister(MRI, Reg)) { + int WaitStatesNeededForUse = + DMFMABetweenVALUWriteVMEMRead - + getWaitStatesSinceDef(Reg, IsDGEMMHazard, + DMFMABetweenVALUWriteVMEMRead); + + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + } + MFMA = nullptr; WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp index a906a4207758f..8ecb2068e6330 100644 --- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -96,7 +96,8 @@ bool GCNPreRAOptimizations::processReg(Register Reg) { return false; case AMDGPU::V_ACCVGPR_WRITE_B32_e64: break; - case AMDGPU::COPY: { + case AMDGPU::COPY: + case AMDGPU::PRED_COPY: { // Some subtargets cannot do an AGPR to AGPR copy directly, and need an // intermdiate temporary VGPR register. Try to find the defining // accvgpr_write to avoid temporary registers. diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 1577c1761aadd..845feb097dd9f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -740,6 +740,7 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() { } GCNSchedStage::finalizeGCNSchedStage(); + } bool GCNSchedStage::initGCNRegion() { @@ -753,7 +754,7 @@ bool GCNSchedStage::initGCNRegion() { // Skip empty scheduling regions (0 or 1 schedulable instructions). if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) return false; - + LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) << " " << CurrentMBB->getName() @@ -761,7 +762,6 @@ bool GCNSchedStage::initGCNRegion() { if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); - // Save original instruction order before scheduling for possible revert. Unsched.clear(); Unsched.reserve(DAG.NumRegionInstrs); @@ -777,9 +777,7 @@ bool GCNSchedStage::initGCNRegion() { for (auto &I : DAG) Unsched.push_back(&I); } - PressureBefore = DAG.Pressure[RegionIdx]; - LLVM_DEBUG( dbgs() << "Pressure before scheduling:\nRegion live-ins:" << print(DAG.LiveIns[RegionIdx], DAG.MRI) @@ -795,7 +793,6 @@ bool GCNSchedStage::initGCNRegion() { SavedMutations.swap(DAG.Mutations); DAG.addMutation(createIGroupLPDAGMutation()); } - return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 2249138c7075a..067709c7ea0f9 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -74,7 +74,6 @@ class GCNSchedStrategy : public GenericScheduler { // schedule() have seen register pressure over the critical limits and had to // track register pressure for actual scheduling heuristics. bool HasHighPressure; - // An error margin is necessary because of poor performance of the generic RP // tracker and can be adjusted up for tuning heuristics to try and more // aggressively reduce register pressure. @@ -83,7 +82,6 @@ class GCNSchedStrategy : public GenericScheduler { const unsigned HighRPErrorMargin = 10; unsigned ErrorMargin = DefaultErrorMargin; - unsigned SGPRCriticalLimit; unsigned VGPRCriticalLimit; @@ -341,11 +339,8 @@ class PreRARematStage : public GCNSchedStage { public: bool initGCNSchedStage() override; - bool initGCNRegion() override; - bool shouldRevertScheduling(unsigned WavesAfter) override; - PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; @@ -361,19 +356,14 @@ class ILPInitialScheduleStage : public GCNSchedStage { class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { private: std::vector> SavedMutations; - bool HasIGLPInstrs = false; - public: void schedule() override; - void finalizeSchedule() override; - GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags); }; - } // End namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 8079490b29ca0..648f8d6931d0e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -71,7 +71,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // Dynamically set bits that enable features. bool FlatForGlobal = false; bool AutoWaitcntBeforeBarrier = false; - bool BackOffBarrier = false; bool UnalignedScratchAccess = false; bool UnalignedAccessMode = false; bool HasApertureRegs = false; @@ -509,12 +508,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return AutoWaitcntBeforeBarrier; } - /// \returns true if the target supports backing off of s_barrier instructions - /// when an exception is raised. - bool supportsBackOffBarrier() const { - return BackOffBarrier; - } - bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 1c586a68a9288..bcf5ba1f43827 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -320,7 +320,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, - bool ReserveVCC, bool ReserveFlatScr) { + bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -367,7 +367,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); - if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) + if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); PRINT_FIELD(OS, @@ -407,19 +407,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI)) OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; - if (Optional HsaAbiVer = getHsaAbiVersion(&STI)) { - switch (*HsaAbiVer) { - default: - break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - if (getTargetID()->isXnackSupported()) - OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; - break; - } + switch (CodeObjectVersion) { + default: + break; + case AMDGPU::AMDHSA_COV2: + break; + case AMDGPU::AMDHSA_COV3: + case AMDGPU::AMDHSA_COV4: + case AMDGPU::AMDHSA_COV5: + if (getTargetID()->isXnackSupported()) + OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; + break; } PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, @@ -850,7 +848,8 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index a857fd00a8555..ad3f36a4dc29f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -89,7 +89,8 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) = 0; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) = 0; static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); @@ -100,12 +101,15 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { Optional &getTargetID() { return TargetID; } - void initializeTargetID(const MCSubtargetInfo &STI) { + void initializeTargetID(const MCSubtargetInfo &STI, + unsigned CodeObjectVersion) { assert(TargetID == None && "TargetID can only be initialized once"); TargetID.emplace(STI); + getTargetID()->setCodeObjectVersion(CodeObjectVersion); } - void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { - initializeTargetID(STI); + void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString, + unsigned CodeObjectVersion) { + initializeTargetID(STI, CodeObjectVersion); assert(getTargetID() != None && "TargetID is None"); getTargetID()->setTargetIDFromFeaturesString(FeatureString); @@ -149,7 +153,8 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -209,7 +214,8 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, + unsigned CodeObjectVersion) override; }; } diff --git a/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp index 65011a9eadf83..4254cd3c2008a 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" using namespace llvm; -R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : AMDGPUMachineFunction(MF) { } +R600MachineFunctionInfo::R600MachineFunctionInfo(const Function &F, + const R600Subtarget *STI) + : AMDGPUMachineFunction(F, *STI) {} diff --git a/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h index 6a5ac9023329a..a8abf56f52baf 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -16,9 +16,11 @@ namespace llvm { +class R600Subtarget; + class R600MachineFunctionInfo final : public AMDGPUMachineFunction { public: - R600MachineFunctionInfo(const MachineFunction &MF); + R600MachineFunctionInfo(const Function &F, const R600Subtarget *STI); unsigned CFStackSize; }; diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h index 8d20841292b9a..db3f9bf77016f 100644 --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h @@ -41,6 +41,10 @@ class R600TargetMachine final : public AMDGPUTargetMachine { TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool isMachineVerifierClean() const override { return false; } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 85930312352b8..6614aecb74367 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -905,6 +905,16 @@ enum Offset_COV5 : unsigned { }; } // namespace ImplicitArg + +namespace VirtRegFlag { +// Virtual register flags used for various target specific handlings during +// codegen. +enum Register_Flag : uint8_t { + WWM_REG = 0 // Register operand in a whole-wave mode operation. +}; + +} // namespace VirtRegFlag + } // namespace AMDGPU #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 01f5fb2081719..cc71e7a9c734d 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -311,15 +311,16 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, Register TmpReg = MRI.createVirtualRegister(NewSrcRC); - BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), - TmpReg) + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + TII->get(TII->getCopyOpcode()), TmpReg) .add(MI.getOperand(I)); if (IsAGPR) { const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); Register TmpAReg = MRI.createVirtualRegister(NewSrcRC); - unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ? - AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY; + unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass + ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 + : TII->getCopyOpcode(); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), TmpAReg) .addReg(TmpReg, RegState::Kill); @@ -338,7 +339,7 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm) { - if (Copy->getOpcode() != AMDGPU::COPY) + if (!Copy->isCopy()) return false; if (!MoveImm->isMoveImmediate()) @@ -618,6 +619,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { default: continue; case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::WQM: case AMDGPU::STRICT_WQM: case AMDGPU::SOFT_WQM: @@ -662,11 +664,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { : MBB; MachineBasicBlock::iterator PointToInsertCopy = MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I; - MachineInstr *NewCopy = - BuildMI(*BlockToInsertCopy, PointToInsertCopy, - PointToInsertCopy->getDebugLoc(), - TII->get(AMDGPU::COPY), NewDst) - .addReg(MO.getReg()); + MachineInstr *NewCopy = TII->buildCopy( + *BlockToInsertCopy, PointToInsertCopy, + PointToInsertCopy->getDebugLoc(), NewDst, MO.getReg()); MO.setReg(NewDst); analyzeVGPRToSGPRCopy(NewCopy); } @@ -734,7 +734,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { // Haven't managed to resolve by replacing an SGPR with an immediate // Move src1 to be in M0 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), - TII->get(AMDGPU::COPY), AMDGPU::M0) + TII->get(TII->getCopyOpcode()), AMDGPU::M0) .add(Src1); Src1.ChangeToRegister(AMDGPU::M0, false); } @@ -1064,9 +1064,8 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) { SCCCopy) .addImm(-1) .addImm(0); - I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(), - TII->get(AMDGPU::COPY), DstReg) - .addReg(SCCCopy); + I = TII->buildCopy(*MI.getParent(), std::next(I), I->getDebugLoc(), + DstReg, SCCCopy); MI.eraseFromParent(); continue; } diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp deleted file mode 100644 index f7e3ea5fc0723..0000000000000 --- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ /dev/null @@ -1,71 +0,0 @@ -//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Add implicit use of exec to vector register copies. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-fix-vgpr-copies" - -namespace { - -class SIFixVGPRCopies : public MachineFunctionPass { -public: - static char ID; - -public: - SIFixVGPRCopies() : MachineFunctionPass(ID) { - initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "SI Fix VGPR copies"; } -}; - -} // End anonymous namespace. - -INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false) - -char SIFixVGPRCopies::ID = 0; - -char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; - -bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - switch (MI.getOpcode()) { - case AMDGPU::COPY: - if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) { - MI.addOperand(MF, - MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); - LLVM_DEBUG(dbgs() << "Add exec use to " << MI); - Changed = true; - } - break; - default: - break; - } - } - } - - return Changed; -} diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4897f481bf3c9..fad86d5edbd5e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -269,9 +269,8 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MachineInstr *Inst32 = TII->buildShrunkInst(*MI, Op32); if (HaveNonDbgCarryUse) { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::COPY), - Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); + TII->buildCopy(*MBB, MI, MI->getDebugLoc(), Dst1.getReg(), AMDGPU::VCC, + RegState::Kill); } // Keep the old instruction around to avoid breaking iterators, but @@ -718,7 +717,7 @@ void SIFoldOperands::foldOperand( // copy to a MOV. unsigned MovOp = TII->getMovOpcode(DestRC); - if (MovOp == AMDGPU::COPY) + if (MovOp == AMDGPU::COPY || MovOp == AMDGPU::PRED_COPY) return; UseMI->setDesc(TII->get(MovOp)); @@ -798,7 +797,8 @@ void SIFoldOperands::foldOperand( CopyToVGPR = Src; } else { auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def); + BuildMI(MBB, UseMI, DL, TII->get(TII->getCopyOpcode()), Tmp) + .add(*Def); B.addReg(Tmp); } } @@ -809,7 +809,8 @@ void SIFoldOperands::foldOperand( Vgpr = VGPRCopies[CopyToVGPR]; } else { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); + BuildMI(MBB, UseMI, DL, TII->get(TII->getCopyOpcode()), Vgpr) + .add(*Def); VGPRCopies[CopyToVGPR] = Vgpr; } auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); @@ -876,7 +877,7 @@ void SIFoldOperands::foldOperand( // %sgpr1 = V_READFIRSTLANE_B32 %vgpr // => // %sgpr1 = COPY %sgpr0 - UseMI->setDesc(TII->get(AMDGPU::COPY)); + UseMI->setDesc(TII->get(TII->getCopyOpcode())); UseMI->getOperand(1).setReg(OpToFold.getReg()); UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); UseMI->getOperand(1).setIsKill(false); @@ -1114,7 +1115,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { if (Src1Val == 0) { // y = or x, 0 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); } else if (Src1Val == -1) { // y = or x, -1 => y = v_mov_b32 -1 MI->removeOperand(Src1Idx); @@ -1135,7 +1136,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { } else if (Src1Val == -1) { // y = and x, -1 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); stripExtraCopyOperands(*MI); } else return false; @@ -1149,7 +1150,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { if (Src1Val == 0) { // y = xor x, 0 => y = copy x MI->removeOperand(Src1Idx); - mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); + mutateCopyOp(*MI, TII->get(TII->getCopyOpcode())); return true; } } @@ -1183,7 +1184,7 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const { LLVM_DEBUG(dbgs() << "Folded " << MI << " into "); auto &NewDesc = - TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false)); + TII->get(Src0->isReg() ? TII->getCopyOpcode() : getMovOpc(false)); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI.removeOperand(Src2Idx); @@ -1639,9 +1640,8 @@ bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) { PHI.getOperand(0).setReg(NewReg); MachineBasicBlock *MBB = PHI.getParent(); - BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(), - TII->get(AMDGPU::COPY), PhiOut) - .addReg(NewReg, RegState::Kill); + TII->buildCopy(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(), PhiOut, + NewReg, RegState::Kill); Copy->eraseFromParent(); // We know this copy had a single use. LLVM_DEBUG(dbgs() << "Folded " << PHI); @@ -1678,6 +1678,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) { if (!I->isCopy() && !I->isRegSequence()) return false; Register DstReg = I->getOperand(0).getReg(); + // Physical registers may have more than one instruction definitions + if (DstReg.isPhysical()) + return false; if (TRI->isAGPR(*MRI, DstReg)) continue; MoveRegs.push_back(DstReg); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 8b0f7614c2c47..fe462a556651e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -30,6 +30,85 @@ static cl::opt EnableSpillVGPRToAGPR( cl::ReallyHidden, cl::init(true)); +static constexpr unsigned SGPRBitSize = 32; +static constexpr unsigned SGPRByteSize = SGPRBitSize / 8; +static constexpr unsigned VGPRLaneBitSize = 32; +// FIXME: should be replaced by a constant defined elsewhere +static constexpr unsigned DW_ASPACE_AMDGPU_private_wave = 6; + +// Find a scratch register matching \p RC which is unused and available +// throughout the function. On failure, returns a null register. +static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, + LivePhysRegs &LiveRegs, + const TargetRegisterClass &RC) { + for (MCRegister Reg : RC) { + if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) + return Reg; + } + return MCRegister(); +} + +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + assert(DwarfReg >= 0); + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +static MCCFIInstruction +createScaledCFAInPrivateWave(const GCNSubtarget &ST, + MCRegister DwarfStackPtrReg) { + assert(ST.enableFlatScratch()); + + // When flat scratch is used, the cfa is expressed in terms of private_lane + // (address space 5), but the debugger only accepts addresses in terms of + // private_wave (6). Override the cfa value using the expression + // (wave_size*cfa_reg), which is equivalent to (cfa_reg << wave_size_log2) + const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2(); + assert(WavefrontSizeLog2 < 32); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(DwarfStackPtrReg, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size) << uint8_t(4) + << uint8_t(dwarf::DW_OP_lit0 + WavefrontSizeLog2) + << uint8_t(dwarf::DW_OP_shl) + << uint8_t(dwarf::DW_OP_lit0 + DW_ASPACE_AMDGPU_private_wave) + << uint8_t(dwarf::DW_OP_LLVM_form_aspace_address); + + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + OSCFIInst << uint8_t(dwarf::DW_CFA_def_cfa_expression); + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + return MCCFIInstruction::createEscape(nullptr, OSCFIInst.str()); +} + +void SIFrameLowering::emitDefCFA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags) const { + MachineFunction &MF = *MBB.getParent(); + const GCNSubtarget &ST = MF.getSubtarget(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); + + MCRegister DwarfStackPtrReg = MCRI->getDwarfRegNum(StackPtrReg, false); + MCCFIInstruction CFIInst = + ST.enableFlatScratch() + ? createScaledCFAInPrivateWave(ST, DwarfStackPtrReg) + : (AspaceAlreadyDefined ? MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, DwarfStackPtrReg, 0, + DW_ASPACE_AMDGPU_private_wave) + : MCCFIInstruction::createDefCfaRegister( + nullptr, DwarfStackPtrReg)); + buildCFI(MBB, MBBI, DL, CFIInst, Flags); +} + // Find a scratch register that we can use in the prologue. We avoid using // callee-save registers since they may appear to be free when this is called // from canUseAsPrologue (during shrink wrapping), but then no longer be free @@ -43,82 +122,74 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); - if (Unused) { - // We are looking for a register that can be used throughout the entire - // function, so any use is unacceptable. - for (MCRegister Reg : RC) { - if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) - return Reg; - } - } else { - for (MCRegister Reg : RC) { - if (LiveRegs.available(MRI, Reg)) - return Reg; - } + // We are looking for a register that can be used throughout the entire + // function, so any use is unacceptable. + if (Unused) + return findUnusedRegister(MRI, LiveRegs, RC); + + for (MCRegister Reg : RC) { + if (LiveRegs.available(MRI, Reg)) + return Reg; } return MCRegister(); } -static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, - LivePhysRegs &LiveRegs, - Register &TempSGPR, - Optional &FrameIndex, - bool IsFP) { +static void getVGPRSpillLaneOrTempRegister( + MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR, + const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass, + bool IncludeScratchCopy = true) { SIMachineFunctionInfo *MFI = MF.getInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - - // We need to save and restore the current FP/BP. - - // 1: If there is already a VGPR with free lanes, use it. We - // may already have to pay the penalty for spilling a CSR VGPR. - if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { - int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, - TargetStackID::SGPRSpill); - - if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) - llvm_unreachable("allocate SGPR spill should have worked"); - - FrameIndex = NewFI; - - LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); - dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane - << '\n'); - return; - } - - // 2: Next, try to save the FP/BP in an unused SGPR. - TempSGPR = findScratchNonCalleeSaveRegister( - MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); - - if (!TempSGPR) { - int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, - TargetStackID::SGPRSpill); - - if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { - // 3: There's no free lane to spill, and no free register to save FP/BP, - // so we're forced to spill another VGPR to use for the spill. - FrameIndex = NewFI; - - LLVM_DEBUG( - auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); - dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); + unsigned Size = TRI->getSpillSize(RC); + Align Alignment = TRI->getSpillAlign(RC); + + // We need to save and restore the given SGPR. + + Register ScratchSGPR; + // 1: Try to save the given register into an unused scratch SGPR. The LiveRegs + // should have all the callee saved registers marked as used. For certain + // cases we skip copy to scratch SGPR. + if (IncludeScratchCopy) + ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC); + + if (!ScratchSGPR) { + int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr, + TargetStackID::SGPRSpill); + + if (TRI->spillSGPRToVGPR() && + MFI->allocateSGPRSpillToVGPRLane(MF, FI, /* IsPrologEpilog */ true)) { + // 2: There's no free lane to spill, and no free register to save the + // SGPR, so we're forced to take another VGPR to use for the spill. + MFI->addToPrologEpilogSGPRSpills( + SGPR, PrologEpilogSGPRSaveRestoreInfo( + SGPRSaveKind::SPILL_TO_VGPR_LANE, FI)); + + LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front(); + dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " + << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane + << '\n';); } else { - // Remove dead index - MF.getFrameInfo().RemoveStackObject(NewFI); - // 4: If all else fails, spill the FP/BP to memory. - FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); - LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " - << (IsFP ? "FP" : "BP") << '\n'); + // Remove dead index + MF.getFrameInfo().RemoveStackObject(FI); + // 3: If all else fails, spill the register to memory. + FI = FrameInfo.CreateSpillStackObject(Size, Alignment); + MFI->addToPrologEpilogSGPRSpills( + SGPR, + PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI)); + LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling " + << printReg(SGPR, TRI) << '\n'); } } else { - LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " - << printReg(TempSGPR, TRI) << '\n'); + MFI->addToPrologEpilogSGPRSpills( + SGPR, PrologEpilogSGPRSaveRestoreInfo( + SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR)); + LiveRegs.addReg(ScratchSGPR); + LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to " + << printReg(ScratchSGPR, TRI) << '\n'); } } @@ -130,7 +201,8 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - Register SpillReg, int FI, int64_t DwordOff = 0) { + Register SpillReg, int FI, Register FrameReg, + int64_t DwordOff = 0) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -140,10 +212,11 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); LiveRegs.addReg(SpillReg); - TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true, - FuncInfo.getStackPtrOffsetReg(), DwordOff, MMO, - nullptr, &LiveRegs); - LiveRegs.removeReg(SpillReg); + bool IsKill = !MBB.isLiveIn(SpillReg); + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg, + DwordOff, MMO, nullptr, &LiveRegs); + if (IsKill) + LiveRegs.removeReg(SpillReg); } static void buildEpilogRestore(const GCNSubtarget &ST, @@ -152,7 +225,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - const DebugLoc &DL, Register SpillReg, int FI) { + const DebugLoc &DL, Register SpillReg, int FI, + Register FrameReg, int64_t DwordOff = 0) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; @@ -161,9 +235,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST, MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); - TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, - FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, - &LiveRegs); + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg, + DwordOff, MMO, nullptr, &LiveRegs); } static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -191,6 +264,251 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(GitPtrLo); } +static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, + const SIMachineFunctionInfo *FuncInfo, + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsProlog) { + if (LiveRegs.empty()) { + LiveRegs.init(TRI); + if (IsProlog) { + LiveRegs.addLiveIns(MBB); + } else { + // In epilog. + LiveRegs.addLiveOuts(MBB); + LiveRegs.stepBackward(*MBBI); + } + } +} + +namespace llvm { + +// SpillBuilder to save/restore special SGPR spills like the one needed for FP, +// BP, etc. These spills are delayed until the current function's frame is +// finalized. For a given register, the builder uses the +// PrologEpilogSGPRSaveRestoreInfo to decide the spill method. +class PrologEpilogSGPRSpillBuilder { + MachineBasicBlock::iterator MI; + MachineBasicBlock &MBB; + MachineFunction &MF; + const GCNSubtarget &ST; + MachineFrameInfo &MFI; + SIMachineFunctionInfo *FuncInfo; + const SIInstrInfo *TII; + const SIRegisterInfo &TRI; + const MCRegisterInfo *MCRI; + const SIFrameLowering *TFI; + Register SuperReg; + const PrologEpilogSGPRSaveRestoreInfo SI; + LivePhysRegs &LiveRegs; + const DebugLoc &DL; + Register FrameReg; + ArrayRef SplitParts; + unsigned NumSubRegs; + unsigned EltSize = 4; + bool IsFramePtrPrologSpill; + bool NeedsFrameMoves; + + bool isExec(Register Reg) const { + return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC; + } + + void saveToMemory(const int FI) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + assert(!MFI.isDeadObjectIndex(FI)); + + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); + + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + + for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) + .addReg(SubReg); + + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, + FI, FrameReg, DwordOff); + if (NeedsFrameMoves) { + if (isExec(SuperReg) && (I == NumSubRegs - 1)) + SubReg = AMDGPU::EXEC; + else if (IsFramePtrPrologSpill) + SubReg = FuncInfo->getFrameOffsetReg(); + + // FIXME: CFI for EXEC needs a fix by accurately computing the spill + // offset for both the low and high components. + if (SubReg != AMDGPU::EXEC_LO) + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(SubReg, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); + } + DwordOff += 4; + } + } + + void saveToVGPRLane(const int FI) const { + assert(!MFI.isDeadObjectIndex(FI)); + + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); + assert(Spill.size() == NumSubRegs); + + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR) + .addReg(SubReg) + .addImm(Spill[I].Lane) + .addReg(Spill[I].VGPR, RegState::Undef); + if (NeedsFrameMoves) { + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, AMDGPU::EXEC, Spill); + } else if (IsFramePtrPrologSpill) { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, + FuncInfo->getFrameOffsetReg(), + Spill[I].VGPR, Spill[I].Lane); + } else { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[I].VGPR, + Spill[I].Lane); + } + } + } + } + + void copyToScratchSGPR(Register DstReg) const { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) + .addReg(SuperReg) + .setMIFlag(MachineInstr::FrameSetup); + if (NeedsFrameMoves) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(DstReg); + ArrayRef DstSplitParts = TRI.getRegSplitParts(RC, EltSize); + unsigned DstNumSubRegs = DstSplitParts.empty() ? 1 : DstSplitParts.size(); + assert(NumSubRegs == DstNumSubRegs); + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SrcSubReg = + NumSubRegs == 1 ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + Register DstSubReg = + NumSubRegs == 1 ? DstReg + : Register(TRI.getSubReg(DstReg, DstSplitParts[I])); + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForRegToSGPRPairSpill(MBB, MI, DL, AMDGPU::EXEC, + DstReg); + } else { + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(SrcSubReg, false), + MCRI->getDwarfRegNum(DstSubReg, false))); + } + } + } + } + + void restoreFromMemory(const int FI) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + + for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR, + FI, FrameReg, DwordOff); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) + .addReg(TmpVGPR, RegState::Kill); + DwordOff += 4; + } + } + + void restoreFromVGPRLane(const int FI) { + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); + assert(Spill.size() == NumSubRegs); + + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SubReg = NumSubRegs == 1 + ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill[I].VGPR) + .addImm(Spill[I].Lane); + } + } + + void copyFromScratchSGPR(Register SrcReg) const { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg) + .addReg(SrcReg) + .setMIFlag(MachineInstr::FrameDestroy); + } + +public: + PrologEpilogSGPRSpillBuilder(Register Reg, + const PrologEpilogSGPRSaveRestoreInfo SI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, const SIInstrInfo *TII, + const SIRegisterInfo &TRI, + LivePhysRegs &LiveRegs, Register FrameReg, + bool IsFramePtrPrologSpill = false) + : MI(MI), MBB(MBB), MF(*MBB.getParent()), + ST(MF.getSubtarget()), MFI(MF.getFrameInfo()), + FuncInfo(MF.getInfo()), TII(TII), TRI(TRI), + MCRI(MF.getMMI().getContext().getRegisterInfo()), + TFI(ST.getFrameLowering()), SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), + DL(DL), FrameReg(FrameReg), + IsFramePtrPrologSpill(IsFramePtrPrologSpill) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + SplitParts = TRI.getRegSplitParts(RC, EltSize); + NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + + // FIXME: Switch to using MF.needsFrameMoves() later. + NeedsFrameMoves = true; + + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); + } + + void save() { + switch (SI.getKind()) { + case SGPRSaveKind::SPILL_TO_MEM: + return saveToMemory(SI.getIndex()); + case SGPRSaveKind::SPILL_TO_VGPR_LANE: + return saveToVGPRLane(SI.getIndex()); + case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: + return copyToScratchSGPR(SI.getReg()); + } + } + + void restore() { + switch (SI.getKind()) { + case SGPRSaveKind::SPILL_TO_MEM: + return restoreFromMemory(SI.getIndex()); + case SGPRSaveKind::SPILL_TO_VGPR_LANE: + return restoreFromVGPRLane(SI.getIndex()); + case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: + return copyFromScratchSGPR(SI.getReg()); + } + } +}; + +} // namespace llvm + // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` void SIFrameLowering::emitEntryFunctionFlatScratchInit( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -339,8 +657,6 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit( // memory. They should have been removed by now, except CFI Saved Reg spills. static bool allStackObjectsAreDead(const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; ++I) { @@ -348,7 +664,7 @@ static bool allStackObjectsAreDead(const MachineFunction &MF) { // determineCalleeSaves() might have added the SGPRSpill stack IDs for // CFI saves into scratch VGPR, ignore them if (MFI.getStackID(I) == TargetStackID::SGPRSpill && - TRI->isCFISavedRegsSpillEnabled() && I == FuncInfo->EXECSaveIndex) { + FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) { continue; } return false; @@ -447,18 +763,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock::iterator I = MBB.begin(); // FIXME: Switch to using MF.needsFrameMoves() later - const bool needsFrameMoves = true; + const bool NeedsFrameMoves = true; - if (needsFrameMoves) { + if (NeedsFrameMoves) { // On entry the SP/FP are not set up, so we need to define the CFA in terms // of a literal location expression. static const char CFAEncodedInst[] = { dwarf::DW_CFA_def_cfa_expression, 3, // length static_cast(dwarf::DW_OP_lit0), - static_cast( - dwarf::DW_OP_lit6), // DW_ASPACE_AMDGPU_private_wave FIXME: - // should be defined elsewhere + static_cast(dwarf::DW_OP_lit0 + DW_ASPACE_AMDGPU_private_wave), static_cast(dwarf::DW_OP_LLVM_form_aspace_address)}; buildCFI(MBB, I, DL, MCCFIInstruction::createEscape( @@ -725,22 +1039,6 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { llvm_unreachable("Invalid TargetStackID::Value"); } -static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, - const SIMachineFunctionInfo *FuncInfo, - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, bool IsProlog) { - if (LiveRegs.empty()) { - LiveRegs.init(TRI); - if (IsProlog) { - LiveRegs.addLiveIns(MBB); - } else { - // In epilog. - LiveRegs.addLiveOuts(MBB); - LiveRegs.stepBackward(*MBBI); - } - } -} - void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const { @@ -752,10 +1050,8 @@ void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, Register StackPtrReg = MF.getInfo()->getStackPtrOffsetReg(); - // DW_ASPACE_AMDGPU_private_wave FIXME: should be defined elsewhere - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createLLVMDefAspaceCfa( - nullptr, MCRI->getDwarfRegNum(StackPtrReg, false), 0, 6)); + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/true, + MachineInstr::FrameSetup); buildCFIForRegToSGPRPairSpill(MBB, MBBI, DL, AMDGPU::PC_REG, TRI.getReturnAddressReg(MF)); @@ -785,12 +1081,14 @@ void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, for_each(AMDGPU::SGPR_32RegClass.getRegisters(), ProcessReg); } -// Activate all lanes, returns saved exec. +// Activate only the inactive lanes when \p EnableInactiveLanes is true. +// Otherwise, activate all lanes. It returns the saved exec. static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, bool IsProlog) { + const DebugLoc &DL, bool IsProlog, + bool EnableInactiveLanes) { Register ScratchExecCopy; MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -807,103 +1105,190 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, LiveRegs.addReg(ScratchExecCopy); - const unsigned OrSaveExec = - ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; - auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) - .addImm(-1); + const unsigned SaveExecOpc = + ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32 + : AMDGPU::S_OR_SAVEEXEC_B32) + : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64 + : AMDGPU::S_OR_SAVEEXEC_B64); + auto SaveExec = + BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1); SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. return ScratchExecCopy; } -// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. -// Otherwise we are spilling to memory. -static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill; -} - -void SIFrameLowering::emitCFISavedRegSpills(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - LivePhysRegs &LiveRegs, - bool emitSpillsToMem) const { +void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc &DL, LivePhysRegs &LiveRegs, + Register FrameReg, + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const { + SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); - SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch + // registers. However, save all lanes of callee-saved VGPRs. Due to this, we + // might end up flipping the EXEC bits twice. + Register ScratchExecCopy; + SmallVector, 2> WWMCalleeSavedRegs, WWMScratchRegs; + FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); + if (!WWMScratchRegs.empty()) + ScratchExecCopy = + buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, + /*IsProlog*/ true, /*EnableInactiveLanes*/ true); + + auto StoreWWMRegisters = + [&](SmallVectorImpl> &WWMRegs) { + for (const auto &Reg : WWMRegs) { + Register VGPR = Reg.first; + int FI = Reg.second; + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + VGPR, FI, FrameReg); + if (NeedsFrameMoves) + // We spill the entire VGPR, so we can get away with just cfi_offset + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); + } + }; + + StoreWWMRegisters(WWMScratchRegs); + if (!WWMCalleeSavedRegs.empty()) { + if (ScratchExecCopy) { + unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); + } else { + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, + /*IsProlog*/ true, + /*EnableInactiveLanes*/ false); + } + } - Optional EXECSaveIndex = FuncInfo->EXECSaveIndex; - DebugLoc DL; + StoreWWMRegisters(WWMCalleeSavedRegs); + if (ScratchExecCopy) { + // FIXME: Split block and make terminator. + unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) + .addReg(ScratchExecCopy, RegState::Kill); + LiveRegs.addReg(ScratchExecCopy); + } - if (emitSpillsToMem) { - // EXEC mask is being spilled into memory at the frame - // index and consumes two double words in - // wave64 mode and one doble word in wave32 mode. And - // build the corresponding CFI rule. - if (EXECSaveIndex && spilledToMemory(MF, *EXECSaveIndex)) { - const int FI = *EXECSaveIndex; - assert(!MFI.isDeadObjectIndex(FI)); + Register FramePtrReg = FuncInfo->getFrameOffsetReg(); - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); + for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { + // Special handle FP spill: + // Skip if FP is saved to a scratch SGPR, the save has already been emitted. + // Otherwise, FP has been moved to a temporary register and spill it + // instead. + bool IsFramePtrPrologSpill = Spill.first == FramePtrReg ? true : false; + Register Reg = IsFramePtrPrologSpill ? FramePtrRegScratchCopy : Spill.first; + if (!Reg) + continue; - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, + LiveRegs, FrameReg, IsFramePtrPrologSpill); + SB.save(); + } - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(TRI.getSubReg(AMDGPU::EXEC, AMDGPU::sub0)); + // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make + // such scratch registers live throughout the function. + SmallVector ScratchSGPRs; + FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs); + if (!ScratchSGPRs.empty()) { + for (MachineBasicBlock &MBB : MF) { + for (MCPhysReg Reg : ScratchSGPRs) + MBB.addLiveIn(Reg); - int DwordOff = 0; - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FI, DwordOff); + MBB.sortUniqueLiveIns(); + } + if (!LiveRegs.empty()) { + for (MCPhysReg Reg : ScratchSGPRs) + LiveRegs.addReg(Reg); + } + } - if (!ST.isWave32()) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(TRI.getSubReg(AMDGPU::EXEC, AMDGPU::sub1)); + // Remove the spill entry created for EXEC. It is needed only for CFISaves in + // the prologue. + if (TRI.isCFISavedRegsSpillEnabled()) + FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec()); +} - DwordOff = 4; - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, FI, DwordOff); - } +void SIFrameLowering::emitCSRSpillRestores( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs, + Register FrameReg, Register FramePtrRegScratchCopy) const { + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + Register FramePtrReg = FuncInfo->getFrameOffsetReg(); - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset( - nullptr, MCRI->getDwarfRegNum(AMDGPU::EXEC, false), - MFI.getObjectOffset(FI) * ST.getWavefrontSize())); - } + for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { + // Special handle FP restore: + // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore + // the FP value to a temporary register. The frame pointer should be + // overwritten only at the end when all other spills are restored from + // current frame. + Register Reg = + Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first; + if (!Reg) + continue; + + PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, + LiveRegs, FrameReg); + SB.restore(); } - if (!emitSpillsToMem) { - - // EXEC mask is being spilled into free VGPR lanes and consumes - // two lanes in wave64 mode and one lane in wave32 mode, build - // the corresponding CFI rule. - if (EXECSaveIndex && !spilledToMemory(MF, *EXECSaveIndex)) { - ArrayRef EXECSpill = - FuncInfo->getSGPRToVGPRSpills(*EXECSaveIndex); - assert(EXECSpill.size()); - BuildMI(MBB, MBBI, DL, - TII->get(AMDGPU::V_WRITELANE_B32), - EXECSpill[0].VGPR) - .addReg(AMDGPU::EXEC_LO) - .addImm(EXECSpill[0].Lane) - .addReg(EXECSpill[0].VGPR, RegState::Undef); - if (!ST.isWave32()) { - assert(EXECSpill.size() == 2); - BuildMI(MBB, MBBI, DL, - TII->get(AMDGPU::V_WRITELANE_B32), - EXECSpill[1].VGPR) - .addReg(AMDGPU::EXEC_HI) - .addImm(EXECSpill[1].Lane) - .addReg(EXECSpill[1].VGPR, RegState::Undef); - } - buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, AMDGPU::EXEC, EXECSpill); + // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the + // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to + // this, we might end up flipping the EXEC bits twice. + Register ScratchExecCopy; + SmallVector, 2> WWMCalleeSavedRegs, WWMScratchRegs; + FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); + if (!WWMScratchRegs.empty()) + ScratchExecCopy = + buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, + /*IsProlog*/ false, /*EnableInactiveLanes*/ true); + + auto RestoreWWMRegisters = + [&](SmallVectorImpl> &WWMRegs) { + for (const auto &Reg : WWMRegs) { + Register VGPR = Reg.first; + int FI = Reg.second; + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + VGPR, FI, FrameReg); + } + }; + + RestoreWWMRegisters(WWMScratchRegs); + if (!WWMCalleeSavedRegs.empty()) { + if (ScratchExecCopy) { + unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); + } else { + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, + /*IsProlog*/ false, + /*EnableInactiveLanes*/ false); } } + + RestoreWWMRegisters(WWMCalleeSavedRegs); + if (ScratchExecCopy) { + // FIXME: Split block and make terminator. + unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) + .addReg(ScratchExecCopy, RegState::Kill); + } } void SIFrameLowering::emitPrologue(MachineFunction &MF, @@ -915,11 +1300,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); - const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); Register FramePtrReg = FuncInfo->getFrameOffsetReg(); @@ -937,207 +1321,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, bool HasBP = false; uint32_t NumBytes = MFI.getStackSize(); uint32_t RoundedSize = NumBytes; - // To avoid clobbering VGPRs in lanes that weren't active on function entry, - // turn on all lanes before doing the spill to memory. - Register ScratchExecCopy; // FIXME: Switch to using MF.needsFrameMoves() later - const bool needsFrameMoves = true; + const bool NeedsFrameMoves = true; - if (needsFrameMoves) + if (NeedsFrameMoves) emitPrologueEntryCFI(MBB, MBBI, DL); - Optional FPSaveIndex = FuncInfo->FramePointerSaveIndex; - Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; - - // VGPRs used for SGPR->VGPR spills - for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : - FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI) - continue; - - if (!ScratchExecCopy) - ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, - /*IsProlog*/ true); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR, - *Reg.FI); - - if (needsFrameMoves) - // We spill the entire VGPR, so we can get away with just cfi_offset - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset( - nullptr, MCRI->getDwarfRegNum(Reg.VGPR, false), - MFI.getObjectOffset(*Reg.FI) * ST.getWavefrontSize())); - } - - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - - if (ScratchExecCopy) { - // FIXME: Split block and make terminator. - unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) - .addReg(ScratchExecCopy, RegState::Kill); - LiveRegs.addReg(ScratchExecCopy); - } - - if (TRI.isCFISavedRegsSpillEnabled()) { - bool emitSpillsToMem = true; - emitCFISavedRegSpills(MF, MBB, MBBI, LiveRegs, emitSpillsToMem); - } - - if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(FramePtrReg); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FramePtrFI); - if (needsFrameMoves) - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset( - nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), - MFI.getObjectOffset(FramePtrFI) * ST.getWavefrontSize())); - } + if (TRI.hasStackRealignment(MF)) + HasFP = true; - if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); + Register FramePtrRegScratchCopy; + if (!HasFP && !hasFP(MF)) { + // Emit the CSR spill stores with SP base register. + emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg, + FramePtrRegScratchCopy, NeedsFrameMoves); + } else { + // CSR spill stores will use FP as base register. + Register SGPRForFPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); + if (SGPRForFPSaveRestoreCopy) { + // Copy FP to the scratch register now and emit the CFI entry. It avoids + // the extra FP copy needed in the other two cases when FP is spilled to + // memory or to a VGPR lane. + PrologEpilogSGPRSpillBuilder SB( + FramePtrReg, + FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI, + DL, TII, TRI, LiveRegs, FramePtrReg, + /*IsFramePtrPrologSpill*/ true); + SB.save(); + LiveRegs.addReg(SGPRForFPSaveRestoreCopy); + } else { + // Copy FP into a new scratch register so that its previous value can be + // spilled after setting up the new frame. + FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass); + if (!FramePtrRegScratchCopy) + report_fatal_error("failed to find free scratch register"); - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(BasePtrReg); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - BasePtrFI); - if (needsFrameMoves) - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset( - nullptr, MCRI->getDwarfRegNum(BasePtrReg, false), - MFI.getObjectOffset(BasePtrFI) * ST.getWavefrontSize())); - } - - if (TRI.isCFISavedRegsSpillEnabled()) { - bool emitSpillsToMem = false; - emitCFISavedRegSpills(MF, MBB, MBBI, LiveRegs, emitSpillsToMem); - } - - // In this case, spill the FP to a reserved VGPR. - if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); - assert(Spill.size() == 1); - - // Save FP before setting it up. - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(FramePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); - - if (needsFrameMoves) - buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, FramePtrReg, Spill[0].VGPR, - Spill[0].Lane); - } - - // In this case, spill the BP to a reserved VGPR. - if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); - - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - - // Save BP before setting it up. - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(BasePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); - if (needsFrameMoves) - buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, BasePtrReg, Spill[0].VGPR, - Spill[0].Lane); - } - - // Emit the copy if we need an FP, and are using a free SGPR to save it. - if (FuncInfo->SGPRForFPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), - FuncInfo->SGPRForFPSaveRestoreCopy) - .addReg(FramePtrReg) - .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) - buildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createRegister( - nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), - MCRI->getDwarfRegNum(FuncInfo->SGPRForFPSaveRestoreCopy, false))); - } - - // Emit the copy if we need a BP, and are using a free SGPR to save it. - if (FuncInfo->SGPRForBPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), - FuncInfo->SGPRForBPSaveRestoreCopy) - .addReg(BasePtrReg) - .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) - buildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createRegister( - nullptr, MCRI->getDwarfRegNum(BasePtrReg, false), - MCRI->getDwarfRegNum(FuncInfo->SGPRForBPSaveRestoreCopy, false))); - } - - // If a copy has been emitted for FP and/or BP, Make the SGPRs - // used in the copy instructions live throughout the function. - SmallVector TempSGPRs; - if (FuncInfo->SGPRForFPSaveRestoreCopy) - TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); - - if (FuncInfo->SGPRForBPSaveRestoreCopy) - TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); - - if (!TempSGPRs.empty()) { - for (MachineBasicBlock &MBB : MF) { - for (MCPhysReg Reg : TempSGPRs) - MBB.addLiveIn(Reg); - - MBB.sortUniqueLiveIns(); - } - if (!LiveRegs.empty()) { - LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); - LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); + LiveRegs.addReg(FramePtrRegScratchCopy); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy) + .addReg(FramePtrReg); } } - if (TRI.hasStackRealignment(MF)) { - HasFP = true; + if (HasFP) { // Needs stack realignment. const unsigned Alignment = MFI.getMaxAlign().value(); RoundedSize += Alignment; @@ -1164,6 +1394,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // If FP is used, emit the CSR spills with FP base register. + if (HasFP) { + emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg, + FramePtrRegScratchCopy, NeedsFrameMoves); + if (FramePtrRegScratchCopy) + LiveRegs.removeReg(FramePtrRegScratchCopy); + } // If we need a base pointer, set it up here. It's whatever the value of // the stack pointer is at this point. Any variable size objects will be // allocated after this, so we can still use the base pointer to reference @@ -1175,10 +1412,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { - if (needsFrameMoves) - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister( - nullptr, MCRI->getDwarfRegNum(FramePtrReg, false))); + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, FramePtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameSetup); } if (HasFP && RoundedSize != 0) { @@ -1189,24 +1425,20 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, Add->getOperand(3).setIsDead(); // Mark SCC as dead. } - assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || - FuncInfo->FramePointerSaveIndex)) && + bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); + assert((!HasFP || FPSaved) && "Needed to save FP but didn't save it anywhere"); // If we allow spilling to AGPRs we may have saved FP but then spill // everything into AGPRs instead of the stack. - assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && - !FuncInfo->FramePointerSaveIndex) || - EnableSpillVGPRToAGPR) && + assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) && "Saved FP but didn't need it"); - assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || - FuncInfo->BasePointerSaveIndex)) && + bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg); + assert((!HasBP || BPSaved) && "Needed to save BP but didn't save it anywhere"); - assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && - !FuncInfo->BasePointerSaveIndex)) && - "Saved BP but didn't need it"); + assert((HasBP || !BPSaved) && "Saved BP but didn't need it"); } void SIFrameLowering::emitEpilogue(MachineFunction &MF, @@ -1217,9 +1449,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); LivePhysRegs LiveRegs; // Get the insert location for the epilogue. If there were no terminators in // the block, get the last instruction. @@ -1239,12 +1470,32 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, ? NumBytes + MFI.getMaxAlign().value() : NumBytes; const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); - const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); - const Register BasePtrReg = - TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); + Register FramePtrReg = FuncInfo->getFrameOffsetReg(); + bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); + + Register FramePtrRegScratchCopy; + Register SGPRForFPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); + if (FPSaved) { + // CSR spill restores should use FP as base register. If + // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP + // into a new scratch register and copy to FP later when other registers are + // restored from the current stack frame. + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); + if (SGPRForFPSaveRestoreCopy) { + LiveRegs.addReg(SGPRForFPSaveRestoreCopy); + } else { + FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass); + if (!FramePtrRegScratchCopy) + report_fatal_error("failed to find free scratch register"); - Optional FPSaveIndex = FuncInfo->FramePointerSaveIndex; - Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; + LiveRegs.addReg(FramePtrRegScratchCopy); + } + + emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg, + FramePtrRegScratchCopy); + } if (RoundedSize != 0 && hasFP(MF)) { auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) @@ -1254,126 +1505,39 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, Add->getOperand(3).setIsDead(); // Mark SCC as dead. } - if (FuncInfo->SGPRForFPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) - .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameDestroy); - } - - if (FuncInfo->SGPRForBPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) - .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameDestroy); - } - - if (FPSaveIndex) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - if (spilledToMemory(MF, FramePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, FramePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } - } - // FIXME: Switch to using MF.needsFrameMoves() later - const bool needsFrameMoves = true; + const bool NeedsFrameMoves = true; if (hasFP(MF)) { - if (needsFrameMoves) - buildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister( - nullptr, MCRI->getDwarfRegNum(StackPtrReg, false)), - MachineInstr::FrameDestroy); - } - - if (BPSaveIndex) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); - if (spilledToMemory(MF, BasePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, BasePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } - } - - Register ScratchExecCopy; - for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : - FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI) - continue; - - if (!ScratchExecCopy) - ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false); - - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - Reg.VGPR, *Reg.FI); - } - - for (auto ReservedWWM : FuncInfo->wwmAllocation()) { - if (!ScratchExecCopy) - ScratchExecCopy = - buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false); - - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); - } - - if (ScratchExecCopy) { - // FIXME: Split block and make terminator. - unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) - .addReg(ScratchExecCopy, RegState::Kill); + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameDestroy); + } + + if (FPSaved) { + // Insert the copy to restore FP. + Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy + : FramePtrRegScratchCopy; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(SrcReg); + if (SGPRForFPSaveRestoreCopy) + MIB.setMIFlag(MachineInstr::FrameDestroy); + } else { + // Insert the CSR spill restores with SP as the base register. + emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg, + FramePtrRegScratchCopy); } } #ifndef NDEBUG static bool allSGPRSpillsAreDead(const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; ++I) { if (!MFI.isDeadObjectIndex(I) && MFI.getStackID(I) == TargetStackID::SGPRSpill && - (I != FuncInfo->FramePointerSaveIndex && - I != FuncInfo->BasePointerSaveIndex && - (!TRI->isCFISavedRegsSpillEnabled() || - I != FuncInfo->EXECSaveIndex))) { + !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) { return false; } } @@ -1402,9 +1566,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + // Allocate spill slots for WWM reserved VGPRs. if (!FuncInfo->isEntryFunction()) { - // Spill VGPRs used for Whole Wave Mode - FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); + for (Register Reg : FuncInfo->getWWMReservedRegs()) { + const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC)); + } } const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() @@ -1433,8 +1601,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) { - // FIXME: change to enterBasicBlockEnd() - RS->enterBasicBlock(MBB); + RS->enterBasicBlockEnd(MBB); + RS->backward(MI); TRI->eliminateFrameIndex(MI, 0, FIOp, RS); SpillFIs.set(FI); continue; @@ -1509,35 +1677,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } -static void allocateCFISave(MachineFunction &MF, Optional &FI, - Register Reg) { - SIMachineFunctionInfo *MFI = MF.getInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - if (MFI->haveFreeLanesForSGPRSpill(MF, TRI->getSpillSize(*RC) / 4)) { - int NewFI = MF.getFrameInfo().CreateStackObject( - TRI->getSpillSize(*RC), TRI->getSpillAlign(*RC), true, nullptr, - TargetStackID::SGPRSpill); - if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { - FI = NewFI; - } - } else { - int NewFI = MF.getFrameInfo().CreateStackObject( - TRI->getSpillSize(*RC), TRI->getSpillAlign(*RC), true, nullptr, - TargetStackID::SGPRSpill); - if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { - FI = NewFI; - } else { - // Remove dead index - MF.getFrameInfo().RemoveStackObject(NewFI); - FI = MF.getFrameInfo().CreateSpillStackObject( - TRI->getSpillSize(*RC), Align(TRI->getSpillAlign(*RC))); - } - } - return; -} - void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { const GCNSubtarget &ST = MF.getSubtarget(); @@ -1564,31 +1703,56 @@ void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced( } } -// Only report VGPRs to generic code. -void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, - BitVector &SavedVGPRs, - RegScavenger *RS) const { - TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); - SIMachineFunctionInfo *MFI = MF.getInfo(); - if (MFI->isEntryFunction()) - return; - +// The special SGPR spills like the one needed for FP, BP or any reserved +// registers delayed until frame lowering. +void SIFrameLowering::determinePrologEpilogSGPRSaves( + MachineFunction &MF, BitVector &SavedVGPRs, + bool NeedExecCopyReservedReg) const { MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - - // Ignore the SGPRs the default implementation found. - SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); - - // Do not save AGPRs prior to GFX90A because there was no easy way to do so. - // In gfx908 there was do AGPR loads and stores and thus spilling also - // require a temporary VGPR. - if (!ST.hasGFX90AInsts()) - SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); + LivePhysRegs LiveRegs; + LiveRegs.init(*TRI); + // Initially mark callee saved registers as used so we will not choose them + // while looking for scratch SGPRs. + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + for (unsigned I = 0; CSRegs[I]; ++I) + LiveRegs.addReg(CSRegs[I]); + + const TargetRegisterClass &RC = ST.isWave32() + ? AMDGPU::SReg_32_XM0_XEXECRegClass + : AMDGPU::SGPR_64RegClass; + + if (NeedExecCopyReservedReg) { + Register ReservedReg = MFI->getSGPRForEXECCopy(); + assert(ReservedReg && "Should have reserved an SGPR for EXEC copy."); + Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC); + if (UnusedScratchReg) { + // If found any unused scratch SGPR, reserve the register itself for Exec + // copy and there is no need for any spills in that case. + MFI->setSGPRForEXECCopy(UnusedScratchReg); + LiveRegs.addReg(UnusedScratchReg); + } else { + // Needs spill. + assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) && + "Re-reserving spill slot for EXEC copy register"); + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC, + /* IncludeScratchCopy */ false); + } + } if (TRI->isCFISavedRegsSpillEnabled()) { - allocateCFISave(MF, MFI->EXECSaveIndex, - ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC); + Register Exec = TRI->getExec(); + assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) && + "Re-reserving spill slot for EXEC"); + // FIXME: Machine Copy Propagation currently optimizes away the EXEC copy to + // the scratch as we emit it only in the prolog. This optimization should + // not happen for frame related instructions. Until this is fixed ignore + // copy to scratch SGPR. + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, Exec, RC, + /* IncludeScratchCopy */ false); } // hasFP only knows about stack objects that already exist. We're now @@ -1602,29 +1766,78 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, const bool WillHaveFP = FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(MF)); - // VGPRs used for SGPR spilling need to be specially inserted in the prolog, - // so don't allow the default insertion to handle them. - for (auto SSpill : MFI->getSGPRSpillVGPRs()) - SavedVGPRs.reset(SSpill.VGPR); - - LivePhysRegs LiveRegs; - LiveRegs.init(*TRI); - if (WillHaveFP || hasFP(MF)) { - assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && + Register FramePtrReg = MFI->getFrameOffsetReg(); + assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) && "Re-reserving spill slot for FP"); - getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, - MFI->FramePointerSaveIndex, true); + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg); } if (TRI->hasBasePointer(MF)) { - if (MFI->SGPRForFPSaveRestoreCopy) - LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); + Register BasePtrReg = TRI->getBaseRegister(); + assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) && + "Re-reserving spill slot for BP"); + getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg); + } +} + +// Only report VGPRs to generic code. +void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedVGPRs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); + SIMachineFunctionInfo *MFI = MF.getInfo(); + if (MFI->isEntryFunction()) + return; + + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + bool NeedExecCopyReservedReg = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // WRITELANE instructions used for SGPR spills can overwrite the inactive + // lanes of VGPRs and callee must spill and restore them even if they are + // marked Caller-saved. + + // TODO: Handle this elsewhere at an early point. Walking through all MBBs + // here would be a bad heuristic. A better way should be by calling + // allocateWWMSpill during the regalloc pipeline whenever a physical + // register is allocated for the intended virtual registers. That will + // also help excluding the general use of WRITELANE/READLANE intrinsics + // that won't really need any such special handling. + if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32) + MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg()); + else if (MI.getOpcode() == AMDGPU::V_READLANE_B32) + MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg()); + else if (TII->isWWMRegSpillOpcode(MI.getOpcode())) + NeedExecCopyReservedReg = true; + } + } + + // Ignore the SGPRs the default implementation found. + SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); + + // Do not save AGPRs prior to GFX90A because there was no easy way to do so. + // In gfx908 there was do AGPR loads and stores and thus spilling also + // require a temporary VGPR. + if (!ST.hasGFX90AInsts()) + SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); + + determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg); + + // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't + // allow the default insertion to handle them. + for (auto &Reg : MFI->getWWMSpills()) + SavedVGPRs.reset(Reg.first); - assert(!MFI->SGPRForBPSaveRestoreCopy && - !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); - getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, - MFI->BasePointerSaveIndex, false); + // Mark all lane VGPRs as BB LiveIns. + for (MachineBasicBlock &MBB : MF) { + for (auto &Reg : MFI->getWWMSpills()) + MBB.addLiveIn(Reg.first); + + MBB.sortUniqueLiveIns(); } } @@ -1680,29 +1893,31 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots( return true; // Early exit if no callee saved registers are modified! const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - if (!FuncInfo->SGPRForFPSaveRestoreCopy && - !FuncInfo->SGPRForBPSaveRestoreCopy) - return false; - const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *RI = ST.getRegisterInfo(); Register FramePtrReg = FuncInfo->getFrameOffsetReg(); Register BasePtrReg = RI->getBaseRegister(); + Register SGPRForFPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); + Register SGPRForBPSaveRestoreCopy = + FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg); + if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy) + return false; + unsigned NumModifiedRegs = 0; - if (FuncInfo->SGPRForFPSaveRestoreCopy) + if (SGPRForFPSaveRestoreCopy) NumModifiedRegs++; - if (FuncInfo->SGPRForBPSaveRestoreCopy) + if (SGPRForBPSaveRestoreCopy) NumModifiedRegs++; for (auto &CS : CSI) { - if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { - CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); + if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) { + CS.setDstReg(SGPRForFPSaveRestoreCopy); if (--NumModifiedRegs) break; - } else if (CS.getReg() == BasePtrReg && - FuncInfo->SGPRForBPSaveRestoreCopy) { - CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); + } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) { + CS.setDstReg(SGPRForBPSaveRestoreCopy); if (--NumModifiedRegs) break; } @@ -1846,9 +2061,8 @@ bool SIFrameLowering::spillCalleeSavedRegisters( unsigned Reg = CS.getReg(); if (CS.isSpilledToReg()) { - BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), - CS.getDstReg()) - .addReg(Reg, getKillRegState(true)); + TII->buildCopy(MBB, MBBI, DebugLoc(), CS.getDstReg(), Reg, + getKillRegState(true)); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); @@ -1889,19 +2103,6 @@ MachineInstr *SIFrameLowering::buildCFIForRegToRegSpill( MCRI.getDwarfRegNum(RegCopy, false))); } -static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { - if (DwarfReg < 32) { - OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); - } else { - OS << uint8_t(dwarf::DW_OP_regx); - encodeULEB128(DwarfReg, OS); - } -} - -static constexpr unsigned SGPRBitSize = 32; -static constexpr unsigned SGPRByteSize = SGPRBitSize / 8; -static constexpr unsigned VGPRLaneBitSize = 32; - MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const Register SGPR, const Register VGPR, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 62ab54a7fae48..b8a94048a6ed2 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -36,6 +36,17 @@ class SIFrameLowering final : public AMDGPUFrameLowering { RegScavenger *RS = nullptr) const override; void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const; + void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, + bool NeedExecCopyReservedReg) const; + void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + LivePhysRegs &LiveRegs, Register FrameReg, + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const; + void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + LivePhysRegs &LiveRegs, Register FrameReg, + Register FramePtrRegScratchCopy) const; bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -82,6 +93,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering { MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const; + void emitDefCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const; + public: bool hasFP(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 24ef9fdb7b8cf..7a84a3c1caeb1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -961,17 +961,11 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, if (Attr.hasFnAttr(Attribute::ReadNone)) return false; - SIMachineFunctionInfo *MFI = MF.getInfo(); - - const GCNTargetMachine &TM = - static_cast(getTargetMachine()); + // TODO: Should images get their own address space? + Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER; - if (RsrcIntr->IsImage) { - Info.ptrVal = MFI->getImagePSV(TM); + if (RsrcIntr->IsImage) Info.align.reset(); - } else { - Info.ptrVal = MFI->getBufferPSV(TM); - } Info.flags |= MachineMemOperand::MODereferenceable; if (Attr.hasFnAttr(Attribute::ReadOnly)) { @@ -1057,14 +1051,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::amdgcn_buffer_atomic_fadd: { - SIMachineFunctionInfo *MFI = MF.getInfo(); - - const GCNTargetMachine &TM = - static_cast(getTargetMachine()); - Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getOperand(0)->getType()); - Info.ptrVal = MFI->getBufferPSV(TM); + Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER; Info.align.reset(); Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -1099,14 +1088,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::amdgcn_image_bvh_intersect_ray: { - SIMachineFunctionInfo *MFI = MF.getInfo(); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT? - const GCNTargetMachine &TM = - static_cast(getTargetMachine()); - - Info.ptrVal = MFI->getImagePSV(TM); + Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER; Info.align.reset(); Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable; @@ -2058,7 +2043,9 @@ void SITargetLowering::allocateSpecialInputSGPRs( if (Info.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && + AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a @@ -2108,7 +2095,9 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(DispatchPtrReg); } - if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) { + const Module *M = MF.getFunction().getParent(); + if (Info.hasQueuePtr() && + AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) { Register QueuePtrReg = Info.addQueuePtr(TRI); MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(QueuePtrReg); @@ -2342,14 +2331,11 @@ void SITargetLowering::insertCopiesSplitCSR( Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. Entry->addLiveIn(*I); - BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) - .addReg(*I); + TII->buildCopy(*Entry, MBBI, DebugLoc(), NewVR, *I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), *I) - .addReg(NewVR); + TII->buildCopy(*Exit, Exit->getFirstTerminator(), DebugLoc(), *I, NewVR); } } @@ -4254,8 +4240,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); Register SrcCondCopy = MRI.createVirtualRegister(CondRC); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) - .addReg(SrcCond); + BuildMI(*BB, MI, DL, TII->get(TII->getCopyOpcode()), SrcCondCopy) + .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addImm(0) .addReg(Src0, 0, AMDGPU::sub0) @@ -5388,19 +5374,13 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) return lowerTrapEndpgm(Op, DAG); - if (Optional HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) { - switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: - return lowerTrapHsaQueuePtr(Op, DAG); - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: - return Subtarget->supportsGetDoorbellID() ? - lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG); - } - } + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M); + if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3) + return lowerTrapHsaQueuePtr(Op, DAG); - llvm_unreachable("Unknown trap handler"); + return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) : + lowerTrapHsaQueuePtr(Op, DAG); } SDValue SITargetLowering::lowerTrapEndpgm( @@ -5428,7 +5408,8 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr( SDValue QueuePtr; // For code object version 5, QueuePtr is passed through implicit kernarg. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { QueuePtr = loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR); } else { @@ -5522,7 +5503,8 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // For code object version 5, private_base and shared_base are passed through // implicit kernargs. - if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) { + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) { ImplicitParameter Param = (AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE; return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param); @@ -7819,6 +7801,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, }; unsigned Opcode = 0; switch (IntrID) { + if (!Subtarget->hasAtomicFaddNoRtnInsts()) + return makeV_ILLEGAL(Op, DAG); + LLVM_FALLTHROUGH; case Intrinsic::amdgcn_global_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmin: { Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN; @@ -10715,16 +10700,19 @@ SDValue SITargetLowering::performExtractVectorEltCombine( SelectionDAG &DAG = DCI.DAG; EVT VecVT = Vec.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); + EVT VecEltVT = VecVT.getVectorElementType(); + EVT ResVT = N->getValueType(0); + + unsigned VecSize = VecVT.getSizeInBits(); + unsigned VecEltSize = VecEltVT.getSizeInBits(); if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) && allUsesHaveSourceMods(N)) { SDLoc SL(N); - EVT EltVT = N->getValueType(0); SDValue Idx = N->getOperand(1); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Vec.getOperand(0), Idx); - return DAG.getNode(Vec.getOpcode(), SL, EltVT, Elt); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx); + return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt); } // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx) @@ -10732,9 +10720,8 @@ SDValue SITargetLowering::performExtractVectorEltCombine( // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx) // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx) // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt - if (Vec.hasOneUse() && DCI.isBeforeLegalize()) { + if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) { SDLoc SL(N); - EVT EltVT = N->getValueType(0); SDValue Idx = N->getOperand(1); unsigned Opc = Vec.getOpcode(); @@ -10754,21 +10741,18 @@ SDValue SITargetLowering::performExtractVectorEltCombine( case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: case ISD::FMINNUM_IEEE: { - SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx); - SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(1), Idx); DCI.AddToWorklist(Elt0.getNode()); DCI.AddToWorklist(Elt1.getNode()); - return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags()); + return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags()); } } } - unsigned VecSize = VecVT.getSizeInBits(); - unsigned EltSize = EltVT.getSizeInBits(); - // EXTRACT_VECTOR_ELT (, var-idx) => n x select (e, const-idx) if (shouldExpandVectorDynExt(N)) { SDLoc SL(N); @@ -10776,7 +10760,7 @@ SDValue SITargetLowering::performExtractVectorEltCombine( SDValue V; for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) { SDValue IC = DAG.getVectorIdxConstant(I, SL); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC); if (I == 0) V = Elt; else @@ -10792,15 +10776,11 @@ SDValue SITargetLowering::performExtractVectorEltCombine( // elements. This exposes more load reduction opportunities by replacing // multiple small extract_vector_elements with a single 32-bit extract. auto *Idx = dyn_cast(N->getOperand(1)); - if (isa(Vec) && - EltSize <= 16 && - EltVT.isByteSized() && - VecSize > 32 && - VecSize % 32 == 0 && - Idx) { + if (isa(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() && + VecSize > 32 && VecSize % 32 == 0 && Idx) { EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT); - unsigned BitIndex = Idx->getZExtValue() * EltSize; + unsigned BitIndex = Idx->getZExtValue() * VecEltSize; unsigned EltIdx = BitIndex / 32; unsigned LeftoverBitIdx = BitIndex % 32; SDLoc SL(N); @@ -10815,9 +10795,16 @@ SDValue SITargetLowering::performExtractVectorEltCombine( DAG.getConstant(LeftoverBitIdx, SL, MVT::i32)); DCI.AddToWorklist(Srl.getNode()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, EltVT.changeTypeToInteger(), Srl); + EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger(); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl); DCI.AddToWorklist(Trunc.getNode()); - return DAG.getNode(ISD::BITCAST, SL, EltVT, Trunc); + + if (VecEltVT == ResVT) { + return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc); + } + + assert(ResVT.isScalarInteger()); + return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT); } return SDValue(); @@ -11761,6 +11748,7 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node), NewVTList, Ops); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); if (HasChain) { // Update chain. @@ -11770,9 +11758,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, if (NewChannels == 1) { assert(Node->hasNUsesOfValue(1, 0)); - SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY, - SDLoc(Node), Users[Lane]->getValueType(0), - SDValue(NewNode, 0)); + SDNode *Copy = + DAG.getMachineNode(TII->getCopyOpcode(), SDLoc(Node), + Users[Lane]->getValueType(0), SDValue(NewNode, 0)); DAG.ReplaceAllUsesWith(Users[Lane], Copy); return nullptr; } @@ -12502,6 +12490,14 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { } } + // Reserve the SGPR(s) to save/restore EXEC for WWM spill/copy handling. + unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); + Register SReg = ST.isWave32() + ? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1) + : TRI->getAlignedHighSGPRForRC(MF, /*Align=*/2, + &AMDGPU::SGPR_64RegClass); + Info->setSGPRForEXECCopy(SReg); + TargetLoweringBase::finalizeLowering(MF); } @@ -12837,7 +12833,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // results for certain memory destinations. if (RMW->getFunction() ->getFnAttribute("amdgpu-unsafe-fp-atomics") - .getValueAsString() != "true") + .getValueAsString() != "true") return AtomicExpansionKind::CmpXChg; // Always expand system scope fp atomics. @@ -13039,28 +13035,3 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const { return MONoClobber; return MachineMemOperand::MONone; } - -bool SITargetLowering::checkForPhysRegDependency( - SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) const { - if (User->getOpcode() != ISD::CopyToReg) - return false; - if (!Def->isMachineOpcode()) - return false; - MachineSDNode *MDef = dyn_cast(Def); - if (!MDef) - return false; - - unsigned ResNo = User->getOperand(Op).getResNo(); - if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1) - return false; - const MCInstrDesc &II = TII->get(MDef->getMachineOpcode()); - if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) { - PhysReg = AMDGPU::SCC; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); - return true; - } - return false; -} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9e8ff565fe6bd..81bd8dedc303d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -479,11 +479,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool denormalsEnabledForType(LLT Ty, MachineFunction &MF) const; - bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, unsigned &PhysReg, - int &Cost) const override; - bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN = false, diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e0ae4c2581f6..918f58eb57a48 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -141,8 +141,13 @@ enum VmemType { VMEM_BVH }; +static bool updateVMCntOnly(const MachineInstr &Inst) { + return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) || + SIInstrInfo::isFLATScratch(Inst); +} + VmemType getVmemType(const MachineInstr &Inst) { - assert(SIInstrInfo::isVMEM(Inst)); + assert(updateVMCntOnly(Inst)); if (!SIInstrInfo::isMIMG(Inst)) return VMEM_NOSAMPLER; const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode()); @@ -681,7 +686,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, if (T == VM_CNT) { if (Interval.first >= NUM_ALL_VGPRS) continue; - if (SIInstrInfo::isVMEM(Inst)) { + if (updateVMCntOnly(Inst)) { VmemType V = getVmemType(Inst); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) VgprVmemTypes[RegNo] |= 1 << V; @@ -1175,7 +1180,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // previous write and this write are the same type of VMEM // instruction, in which case they're guaranteed to write their // results in order anyway. - if (Op.isUse() || !SIInstrInfo::isVMEM(MI) || + if (Op.isUse() || !updateVMCntOnly(MI) || ScoreBrackets.hasOtherPendingVmemTypes(RegNo, getVmemType(MI))) { ScoreBrackets.determineWait(VM_CNT, RegNo, Wait); @@ -1191,12 +1196,12 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, } } - // The subtarget may have an implicit S_WAITCNT 0 before barriers. If it does - // not, we need to ensure the subtarget is capable of backing off barrier - // instructions in case there are any outstanding memory operations that may - // cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here. + // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0 + // occurs before the instruction. Doing it here prevents any additional + // S_WAITCNTs from being emitted if the instruction was marked as + // requiring a WAITCNT beforehand. if (MI.getOpcode() == AMDGPU::S_BARRIER && - !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { + !ST->hasAutoWaitcntBeforeBarrier()) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d61db5ca62d98..42e94dbde83bf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -64,8 +64,10 @@ static cl::opt Fix16BitCopies( cl::ReallyHidden); SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) - : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), - RI(ST), ST(ST) { + : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN, + /* CatchRetOpcode */ ~0u, /* ReturnOpcode */ ~0u, + AMDGPU::PRED_COPY), + RI(ST), ST(ST) { SchedModel.init(&ST); } @@ -159,6 +161,11 @@ static bool resultDependsOnExec(const MachineInstr &MI) { default: break; case AMDGPU::V_READFIRSTLANE_B32: + case AMDGPU::V_CNDMASK_B64_PSEUDO: + case AMDGPU::V_CNDMASK_B32_dpp: + case AMDGPU::V_CNDMASK_B32_e32: + case AMDGPU::V_CNDMASK_B32_e64: + case AMDGPU::V_CNDMASK_B32_sdwa: return true; } @@ -557,7 +564,7 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, - RegScavenger &RS, + RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg = Register(), Register ImpUseSuperReg = Register()) { assert((TII.getSubtarget().hasMAIInsts() && @@ -574,42 +581,47 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII, const SIRegisterInfo &RI = TII.getRegisterInfo(); // First try to find defining accvgpr_write to avoid temporary registers. - for (auto Def = MI, E = MBB.begin(); Def != E; ) { - --Def; - if (!Def->definesRegister(SrcReg, &RI)) - continue; - if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) - break; + // In the case of copies of overlapping AGPRs, we conservatively do not + // reuse previous accvgpr_writes. Otherwise, we may incorrectly pick up + // an accvgpr_write used for this same copy due to implicit-defs + if (!RegsOverlap) { + for (auto Def = MI, E = MBB.begin(); Def != E; ) { + --Def; + if (!Def->definesRegister(SrcReg, &RI)) + continue; + if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) + break; - MachineOperand &DefOp = Def->getOperand(1); - assert(DefOp.isReg() || DefOp.isImm()); + MachineOperand &DefOp = Def->getOperand(1); + assert(DefOp.isReg() || DefOp.isImm()); - if (DefOp.isReg()) { - // Check that register source operand if not clobbered before MI. - // Immediate operands are always safe to propagate. - bool SafeToPropagate = true; - for (auto I = Def; I != MI && SafeToPropagate; ++I) - if (I->modifiesRegister(DefOp.getReg(), &RI)) - SafeToPropagate = false; + if (DefOp.isReg()) { + bool SafeToPropagate = true; + // Check that register source operand is not clobbered before MI. + // Immediate operands are always safe to propagate. + for (auto I = Def; I != MI && SafeToPropagate; ++I) + if (I->modifiesRegister(DefOp.getReg(), &RI)) + SafeToPropagate = false; - if (!SafeToPropagate) - break; + if (!SafeToPropagate) + break; - DefOp.setIsKill(false); - } + DefOp.setIsKill(false); + } - MachineInstrBuilder Builder = - BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) - .add(DefOp); - if (ImpDefSuperReg) - Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); + MachineInstrBuilder Builder = + BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) + .add(DefOp); + if (ImpDefSuperReg) + Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit); - if (ImpUseSuperReg) { - Builder.addReg(ImpUseSuperReg, - getKillRegState(KillSrc) | RegState::Implicit); - } + if (ImpUseSuperReg) { + Builder.addReg(ImpUseSuperReg, + getKillRegState(KillSrc) | RegState::Implicit); + } - return; + return; + } } RS.enterBasicBlock(MBB); @@ -851,7 +863,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // FIXME: Pass should maintain scavenger to avoid scan through the block on // every AGPR spill. RegScavenger RS; - indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS); + const bool Overlap = RI.regsOverlap(SrcReg, DestReg); + indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS, Overlap); return; } @@ -995,7 +1008,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // If there is an overlap, we can't kill the super-register on the last // instruction, since it will also kill the components made live by this def. - const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); + const bool Overlap = RI.regsOverlap(SrcReg, DestReg); + const bool CanKillSuperReg = KillSrc && !Overlap; for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { unsigned SubIdx; @@ -1010,7 +1024,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register(); Register ImpUseSuper = SrcReg; indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), - RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, + RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap, ImpDefSuper, ImpUseSuper); } else if (Opcode == AMDGPU::V_PK_MOV_B32) { Register DstSubReg = RI.getSubReg(DestReg, SubIdx); @@ -1134,8 +1148,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, if (Cond.size() == 1) { Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(Cond[0]); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -1177,8 +1190,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(RegOp); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -1191,8 +1203,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); Register SReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) - .add(RegOp); + BuildMI(MBB, I, DL, get(getCopyOpcode()), SReg).add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(TrueReg) @@ -1275,7 +1286,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { if (RI.isAGPRClass(DstRC)) - return AMDGPU::COPY; + return getCopyOpcode(); if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { @@ -1283,7 +1294,7 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } - return AMDGPU::COPY; + return getCopyOpcode(); } const MCInstrDesc & @@ -1541,10 +1552,33 @@ static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI) { } } +static unsigned getWWMRegSpillSaveOpcode(unsigned Size) { + // Currently, there is only 32-bit WWM register spills needed. + if (Size != 4) + llvm_unreachable("unknown wwm register spill size"); + + return AMDGPU::SI_SPILL_WWM_V32_SAVE; +} + +static unsigned +getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, + unsigned Size, const SIRegisterInfo &TRI, + const SIMachineFunctionInfo &MFI, bool NeedsCFI) { + // Choose the right opcode if spilling a WWM register. + if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) + return getWWMRegSpillSaveOpcode(Size); + + if (TRI.isVectorSuperClass(RC)) + return getAVSpillSaveOpcode(Size, NeedsCFI); + + return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size, NeedsCFI) + : getVGPRSpillSaveOpcode(Size, NeedsCFI); +} + void SIInstrInfo::storeRegToStackSlotImpl( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, bool NeedsCFI) const { + const TargetRegisterInfo *TRI, Register VReg, bool NeedsCFI) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); @@ -1586,9 +1620,8 @@ void SIInstrInfo::storeRegToStackSlotImpl( return; } - unsigned Opcode = RI.isVectorSuperClass(RC) ? getAVSpillSaveOpcode(SpillSize, NeedsCFI) - : RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize, NeedsCFI) - : getVGPRSpillSaveOpcode(SpillSize, NeedsCFI); + unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, + SpillSize, RI, *MFI, NeedsCFI); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) @@ -1599,13 +1632,12 @@ void SIInstrInfo::storeRegToStackSlotImpl( .addMemOperand(MMO); } -void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, false); +void SIInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, VReg, + false); } void SIInstrInfo::storeRegToStackSlotCFI(MachineBasicBlock &MBB, @@ -1614,7 +1646,8 @@ void SIInstrInfo::storeRegToStackSlotCFI(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, true); + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, + Register(), true); } static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { @@ -1725,11 +1758,35 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) { } } +static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) { + // Currently, there is only 32-bit WWM register spills needed. + if (Size != 4) + llvm_unreachable("unknown wwm register spill size"); + + return AMDGPU::SI_SPILL_WWM_V32_RESTORE; +} + +static unsigned +getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, + unsigned Size, const SIRegisterInfo &TRI, + const SIMachineFunctionInfo &MFI) { + // Choose the right opcode if restoring a WWM register. + if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) + return getWWMRegSpillRestoreOpcode(Size); + + if (TRI.isVectorSuperClass(RC)) + return getAVSpillRestoreOpcode(Size); + + return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size) + : getVGPRSpillRestoreOpcode(Size); +} + void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); @@ -1767,10 +1824,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, return; } - unsigned Opcode = RI.isVectorSuperClass(RC) - ? getAVSpillRestoreOpcode(SpillSize) - : RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize) - : getVGPRSpillRestoreOpcode(SpillSize); + unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC, + SpillSize, RI, *MFI); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset @@ -2938,6 +2993,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_ACCVGPR_WRITE_B32_e64: case AMDGPU::V_ACCVGPR_READ_B32_e64: case AMDGPU::V_ACCVGPR_MOV_B32: @@ -2987,7 +3043,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, return false; unsigned Opc = UseMI.getOpcode(); - if (Opc == AMDGPU::COPY) { + if (UseMI.isCopy()) { Register DstReg = UseMI.getOperand(0).getReg(); bool Is16Bit = getOpSize(UseMI, 0) == 2; bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); @@ -4765,6 +4821,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { default: return AMDGPU::INSTRUCTION_LIST_END; case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; + case AMDGPU::PRED_COPY: + return AMDGPU::PRED_COPY; case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; @@ -4773,9 +4831,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - return MI.getOperand(1).isReg() || - RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? - AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; + return MI.getOperand(1).isReg() || RI.isAGPR(MRI, MI.getOperand(0).getReg()) + ? getCopyOpcode() + : AMDGPU::V_MOV_B32_e32; } case AMDGPU::S_ADD_I32: return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32; @@ -4841,6 +4899,53 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { "Unexpected scalar opcode without corresponding vector one!"); } +void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + bool IsSCCLive, + SlotIndexes *Indexes) const { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + bool IsWave32 = ST.isWave32(); + if (IsSCCLive) { + // Insert two move instructions, one to save the original value of EXEC and + // the other to turn on all bits in EXEC. This is required as we can't use + // the single instruction S_OR_SAVEEXEC that clobbers SCC. + unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg) + .addReg(Exec, RegState::Kill); + auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); + if (Indexes) { + Indexes->insertMachineInstrInMaps(*StoreExecMI); + Indexes->insertMachineInstrInMaps(*FlipExecMI); + } + } else { + const unsigned OrSaveExec = + IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; + auto SaveExec = + BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1); + SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. + if (Indexes) + Indexes->insertMachineInstrInMaps(*SaveExec); + } +} + +void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + SlotIndexes *Indexes) const { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; + MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + auto ExecRestoreMI = BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) + .addReg(Reg, RegState::Kill); + if (Indexes) + Indexes->insertMachineInstrInMaps(*ExecRestoreMI); +} + static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MachineRegisterInfo &MRI, @@ -4935,7 +5040,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { unsigned Size = RI.getRegSizeInBits(*RC); unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32; if (MO.isReg()) - Opcode = AMDGPU::COPY; + Opcode = getCopyOpcode(); else if (RI.isSGPRClass(RC)) Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; @@ -4964,8 +5069,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, Register SubReg = MRI.createVirtualRegister(SubRC); if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { - BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) - .addReg(SuperReg.getReg(), 0, SubIdx); + buildCopy(*MBB, MI, DL, SubReg, SuperReg.getReg(), 0, SubIdx); return SubReg; } @@ -4975,11 +5079,10 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, // eliminate this extra copy. Register NewSuperReg = MRI.createVirtualRegister(SuperRC); - BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) - .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); + buildCopy(*MBB, MI, DL, NewSuperReg, SuperReg.getReg(), 0, + SuperReg.getSubReg()); - BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) - .addReg(NewSuperReg, 0, SubIdx); + buildCopy(*MBB, MI, DL, SubReg, NewSuperReg, 0, SubIdx); return SubReg; } @@ -5353,9 +5456,8 @@ Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, if (RI.hasAGPRs(VRC)) { VRC = RI.getEquivalentVGPRClass(VRC); Register NewSrcReg = MRI.createVirtualRegister(VRC); - BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), - get(TargetOpcode::COPY), NewSrcReg) - .addReg(SrcReg); + buildCopy(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), NewSrcReg, + SrcReg); SrcReg = NewSrcReg; } @@ -5520,7 +5622,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; Register DstReg = MRI.createVirtualRegister(DstRC); - auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); + auto Copy = BuildMI(InsertMBB, I, DL, get(getCopyOpcode()), DstReg).add(Op); Op.setReg(DstReg); Op.setSubReg(0); @@ -6321,8 +6423,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, Register CarryInReg = Inst.getOperand(4).getReg(); if (!MRI.constrainRegClass(CarryInReg, CarryRC)) { Register NewCarryReg = MRI.createVirtualRegister(CarryRC); - BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg) - .addReg(CarryInReg); + buildCopy(*MBB, &Inst, Inst.getDebugLoc(), NewCarryReg, CarryInReg); } Register CarryOutReg = Inst.getOperand(1).getReg(); @@ -6596,8 +6697,7 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) { if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) { - BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC) - .addReg(CandI.getOperand(1).getReg()); + buildCopy(MBB, MII, DL, CopySCC, CandI.getOperand(1).getReg()); CopyFound = true; } break; @@ -7134,6 +7234,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( switch (UseMI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::WQM: case AMDGPU::SOFT_WQM: case AMDGPU::STRICT_WWM: @@ -7306,6 +7407,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( // class associated with the operand, so we need to find an equivalent VGPR // register class in order to move the instruction to the VALU. case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: @@ -7765,7 +7867,7 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { } bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { - return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && + return !MI.isTerminator() && !MI.isCopy() && MI.modifiesRegister(AMDGPU::EXEC, &RI); } @@ -8113,6 +8215,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, DefInst = nullptr; switch (MI->getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::V_MOV_B32_e32: { auto &Op1 = MI->getOperand(1); if (Op1.isReg() && Op1.getReg().isVirtual()) { @@ -8232,7 +8335,7 @@ MachineInstr *SIInstrInfo::createPHIDestinationCopy( if (Cur != MBB.end()) do { if (!Cur->isPHI() && Cur->readsRegister(Dst)) - return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); + return buildCopy(MBB, Cur, DL, Dst, Src); ++Cur; } while (Cur != MBB.end() && Cur != LastPHIIt); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 8e4ed773a1ac9..4e1ba1bcf9674 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -231,7 +231,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, + const TargetRegisterInfo *TRI, Register VReg, bool NeedsCFI) const; public: @@ -239,7 +239,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void storeRegToStackSlotCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, @@ -250,7 +251,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; @@ -264,7 +266,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { // Returns an opcode that can be used to move a value to a \p DstRC // register. If there is no hardware instruction that can store to \p - // DstRC, then AMDGPU::COPY is returned. + // DstRC, then getCopyOpcode() is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, @@ -642,6 +644,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; } + static bool isWWMRegSpillOpcode(uint16_t Opcode) { + return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || + Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE; + } + static bool isDPP(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::DPP; } @@ -923,6 +930,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { unsigned getVALUOp(const MachineInstr &MI) const; + void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, bool IsSCCLive, + SlotIndexes *Indexes = nullptr) const; + + void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + Register Reg, SlotIndexes *Indexes = nullptr) const; + /// Return the correct register class for \p OpNo. For target-specific /// instructions, this will return the register class that has been defined /// in tablegen. For generic instructions, like REG_SEQUENCE it will return diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index dec277078bba7..c422b39a96cef 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -874,6 +874,8 @@ defm SI_SPILL_AV256 : SI_SPILL_VGPR ; defm SI_SPILL_AV512 : SI_SPILL_VGPR ; defm SI_SPILL_AV1024 : SI_SPILL_VGPR ; +defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR ; + def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < (outs SReg_64:$dst), (ins si_ga:$ptr_lo, si_ga:$ptr_hi), diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 79f2826aa5cec..f140c399c1699 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1213,7 +1213,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired, (void)Read2; - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); // Copy to the old destination registers. BuildMI(*MBB, InsertBefore, DL, CopyDesc) @@ -1345,7 +1345,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1394,7 +1394,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst); @@ -1450,7 +1450,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1510,7 +1510,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); @@ -1609,7 +1609,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const MCInstrDesc &CopyDesc = TII->get(TII->getCopyOpcode()); const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst); const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 67077a2eaa6bf..a858d96b1ff7f 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -232,10 +232,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { // will interfere with trying to form s_and_saveexec_b64 later. Register CopyReg = SimpleIf ? SaveExecReg : MRI->createVirtualRegister(BoolRC); - MachineInstr *CopyExec = - BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg) - .addReg(Exec) - .addReg(Exec, RegState::ImplicitDefine); + MachineInstrBuilder CopyExec = MachineInstrBuilder( + *MBB.getParent(), TII->buildCopy(MBB, I, DL, CopyReg, Exec)); + CopyExec.addReg(Exec, RegState::ImplicitDefine); LoweredIf.insert(CopyReg); Register Tmp = MRI->createVirtualRegister(BoolRC); diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index d4f0906f020ab..1652bb8ac4389 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -492,7 +492,7 @@ bool SILowerI1Copies::lowerCopiesFromI1() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { - if (MI.getOpcode() != AMDGPU::COPY) + if (!MI.isCopy()) continue; Register DstReg = MI.getOperand(0).getReg(); @@ -571,7 +571,7 @@ bool SILowerI1Copies::lowerPhis() { MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB(); MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); - if (IncomingDef->getOpcode() == AMDGPU::COPY) { + if (IncomingDef->isCopy()) { IncomingReg = IncomingDef->getOperand(1).getReg(); assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); assert(!IncomingDef->getOperand(1).getSubReg()); @@ -674,8 +674,7 @@ bool SILowerI1Copies::lowerCopiesToI1() { LF.initialize(MBB); for (MachineInstr &MI : MBB) { - if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF && - MI.getOpcode() != AMDGPU::COPY) + if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF && !MI.isCopy()) continue; Register DstReg = MI.getOperand(0).getReg(); @@ -744,7 +743,7 @@ bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const { if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF) return true; - if (MI->getOpcode() != AMDGPU::COPY) + if (!MI->isCopy()) break; Reg = MI->getOperand(1).getReg(); @@ -827,9 +826,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB, if (PrevConstant && CurConstant) { if (PrevVal == CurVal) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg); + TII->buildCopy(MBB, I, DL, DstReg, CurReg); } else if (CurVal) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg); + TII->buildCopy(MBB, I, DL, DstReg, ExecReg); } else { BuildMI(MBB, I, DL, TII->get(XorOp), DstReg) .addReg(ExecReg) @@ -863,11 +862,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB, } if (PrevConstant && !PrevVal) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg) - .addReg(CurMaskedReg); + TII->buildCopy(MBB, I, DL, DstReg, CurMaskedReg); } else if (CurConstant && !CurVal) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg) - .addReg(PrevMaskedReg); + TII->buildCopy(MBB, I, DL, DstReg, PrevMaskedReg); } else if (PrevConstant && PrevVal) { BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg) .addReg(CurMaskedReg) diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 0cdb1ef6c3237..72316e1e0452f 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -52,7 +52,9 @@ class SILowerSGPRSpills : public MachineFunctionPass { SILowerSGPRSpills() : MachineFunctionPass(ID) {} void calculateSaveRestoreBlocks(MachineFunction &MF); - bool spillCalleeSavedRegs(MachineFunction &MF); + bool spillCalleeSavedRegs(MachineFunction &MF, + SmallVectorImpl &CalleeSavedFIs); + void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS); bool runOnMachineFunction(MachineFunction &MF) override; @@ -60,6 +62,12 @@ class SILowerSGPRSpills : public MachineFunctionPass { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } + + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA) + .set(MachineFunctionProperties::Property::NoVRegs); + } }; } // end anonymous namespace @@ -115,7 +123,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); - TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI, + Register()); assert(I != RestoreBlock.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -174,7 +183,8 @@ static void updateLiveness(MachineFunction &MF, ArrayRef CSI) { EntryBB.sortUniqueLiveIns(); } -bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { +bool SILowerSGPRSpills::spillCalleeSavedRegs( + MachineFunction &MF, SmallVectorImpl &CalleeSavedFIs) { MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -213,6 +223,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { TRI->getSpillAlign(*RC), true); CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); + CalleeSavedFIs.push_back(JunkFI); } } @@ -225,6 +236,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), TRI->getSpillAlign(*RC), true); CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI)); + CalleeSavedFIs.push_back(JunkFI); } if (!CSI.empty()) { @@ -244,6 +256,50 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { return false; } +void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, + LiveIntervals *LIS) { + // TODO: This is a workaround to avoid the unmodelled liveness computed with + // whole-wave virtual registers when allocated together with the regular VGPR + // virtual registers. Presently, the liveness computed during the regalloc is + // only uniform (or single lane aware) and it doesn't take account of the + // divergent control flow that exists for our GPUs. Since the WWM registers + // can modify inactive lanes, the wave-aware liveness should be computed for + // the virtual registers to accurately plot their interferences. Without + // having the divergent CFG for the function, it is difficult to implement the + // wave-aware liveness info. Until then, we conservatively extend the liveness + // of the wwm registers into the entire function so that they won't be reused + // without first spilling/splitting their liveranges. + SIMachineFunctionInfo *MFI = MF.getInfo(); + + // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks. + for (auto Reg : MFI->getSGPRSpillVGPRs()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + MachineBasicBlock::iterator InsertBefore = SaveBlock->begin(); + auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(), + TII->get(AMDGPU::IMPLICIT_DEF), Reg); + MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); + if (LIS) { + LIS->InsertMachineInstrInMaps(*MIB); + } + } + } + + // Insert the KILL in the return blocks to extend their liveness untill the + // end of function. Insert a separate KILL for each VGPR. + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { + MachineBasicBlock::iterator InsertBefore = + RestoreBlock->getFirstTerminator(); + for (auto Reg : MFI->getSGPRSpillVGPRs()) { + auto MIB = + BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(), + TII->get(TargetOpcode::KILL)); + MIB.addReg(Reg); + if (LIS) + LIS->InsertMachineInstrInMaps(*MIB); + } + } +} + bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); @@ -257,7 +313,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // First, expose any CSR SGPR spills. This is mostly the same as what PEI // does, but somewhat simpler. calculateSaveRestoreBlocks(MF); - bool HasCSRs = spillCalleeSavedRegs(MF); + SmallVector CalleeSavedFIs; + bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -271,6 +328,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; bool NewReservedRegs = false; + bool SpilledToVirtVGPRLanes = false; // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be // handled as SpilledToReg in regular PrologEpilogInserter. @@ -293,23 +351,53 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); - if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { - NewReservedRegs = true; - bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( - MI, FI, nullptr, Indexes, LIS); - (void)Spilled; - assert(Spilled && "failed to spill SGPR to VGPR when allocated"); - SpillFIs.set(FI); + + bool IsCalleeSaveSGPRSpill = + std::find(CalleeSavedFIs.begin(), CalleeSavedFIs.end(), FI) != + CalleeSavedFIs.end(); + if (IsCalleeSaveSGPRSpill) { + // Spill callee-saved SGPRs into physical VGPR lanes. + + // TODO: This is to ensure the CFIs are static for efficient frame + // unwinding in the debugger. Spilling them into virtual VGPR lanes + // involve regalloc to allocate the physical VGPRs and that might + // cause intermediate spill/split of such liveranges for successful + // allocation. This would result in broken CFI encoding unless the + // regalloc aware CFI generation to insert new CFIs along with the + // intermediate spills is implemented. There is no such support + // currently exist in the LLVM compiler. + if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) { + NewReservedRegs = true; + bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( + MI, FI, nullptr, Indexes, LIS, true); + if (!Spilled) + llvm_unreachable( + "failed to spill SGPR to physical VGPR lane when allocated"); + } + } else { + if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { + bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( + MI, FI, nullptr, Indexes, LIS); + if (!Spilled) + llvm_unreachable( + "failed to spill SGPR to virtual VGPR lane when allocated"); + SpillFIs.set(FI); + SpilledToVirtVGPRLanes = true; + } } } } - // FIXME: Adding to live-ins redundant with reserving registers. - for (MachineBasicBlock &MBB : MF) { - for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) - MBB.addLiveIn(SSpill.VGPR); - MBB.sortUniqueLiveIns(); + if (SpilledToVirtVGPRLanes) { + extendWWMVirtRegLiveness(MF, LIS); + if (LIS) { + // Compute the LiveInterval for the newly created virtual registers. + for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) + LIS->createAndComputeVirtRegInterval(Reg); + } + } + for (MachineBasicBlock &MBB : MF) { // FIXME: The dead frame indices are replaced with a null register from // the debug value instructions. We should instead, update it with the // correct register value. But not sure the register value alone is @@ -340,12 +428,31 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; } + if (SpilledToVirtVGPRLanes) { + const TargetRegisterClass *RC = + ST.isWave32() ? &AMDGPU::SGPR_32RegClass : &AMDGPU::SGPR_64RegClass; + // Shift back the reserved SGPR for EXEC copy into the lowest range. + // This SGPR is reserved to handle the whole-wave spill/copy operations + // that might get inserted during vgpr regalloc. + Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); + if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < + TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) + FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); + } else { + // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM + // spills/copies. Reset the SGPR reserved for EXEC copy. + FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); + } + SaveBlocks.clear(); RestoreBlocks.clear(); - // Updated the reserved registers with any VGPRs added for SGPR spills. - if (NewReservedRegs) - MRI.freezeReservedRegs(MF); + // Updated the reserved registers with any physical VGPRs added for SGPR + // spills. + if (NewReservedRegs) { + for (Register Reg : FuncInfo->getWWMReservedRegs()) + MRI.reserveReg(Reg, TRI); + } return MadeChange; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 7ed12b6bca075..73f01af423986 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -31,12 +31,16 @@ using namespace llvm; -SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : AMDGPUMachineFunction(MF), - Mode(MF.getFunction()), - BufferPSV(static_cast(MF.getTarget())), - ImagePSV(static_cast(MF.getTarget())), - GWSResourcePSV(static_cast(MF.getTarget())), +const GCNTargetMachine &getTM(const GCNSubtarget *STI) { + const SITargetLowering *TLI = STI->getTargetLowering(); + return static_cast(TLI->getTargetMachine()); +} + +SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, + const GCNSubtarget *STI) + : AMDGPUMachineFunction(F, *STI), + Mode(F), + GWSResourcePSV(getTM(STI)), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -54,16 +58,19 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDZ(false), ImplicitBufferPtr(false), ImplicitArgPtr(false), + HostcallPtr(false), + HeapPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { - const GCNSubtarget &ST = MF.getSubtarget(); - const Function &F = MF.getFunction(); + const GCNSubtarget &ST = *static_cast(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); + VRegFlags.reserve(1024); + // FIXME: Should have analysis or something rather than attribute to detect // calls. const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); @@ -108,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(MF)) + !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } @@ -148,8 +155,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) DispatchID = true; - if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) - LDSKernelId = true; + if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr")) + HostcallPtr = true; + + if (!F.hasFnAttribute("amdgpu-no-heap-ptr")) + HeapPtr = true; } // FIXME: This attribute is a hack, we just need an analysis on the function @@ -274,8 +284,32 @@ Register SIMachineFunctionInfo::addLDSKernelId() { return ArgInfo.LDSKernelId.getRegister(); } +void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, + uint64_t Size, Align Alignment) { + // Skip if it is an entry function or the register is already added. + if (isEntryFunction() || WWMSpills.count(VGPR)) + return; + + WWMSpills.insert(std::make_pair( + VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); +} + +// Separate out the callee-saved and scratch registers. +void SIMachineFunctionInfo::splitWWMSpillRegisters( + MachineFunction &MF, + SmallVectorImpl> &CalleeSavedRegs, + SmallVectorImpl> &ScratchRegs) const { + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + for (auto &Reg : WWMSpills) { + if (isCalleeSavedReg(CSRegs, Reg.first)) + CalleeSavedRegs.push_back(Reg); + else + ScratchRegs.push_back(Reg); + } +} + bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, - MCPhysReg Reg) { + MCPhysReg Reg) const { for (unsigned I = 0; CSRegs[I]; ++I) { if (CSRegs[I] == Reg) return true; @@ -284,21 +318,58 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, return false; } -/// \p returns true if \p NumLanes slots are available in VGPRs already used for -/// SGPR spilling. -// -// FIXME: This only works after processFunctionBeforeFrameFinalized -bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, - unsigned NumNeed) const { +bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( + MachineFunction &MF, int FI, unsigned LaneIndex) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LaneVGPR; + if (!LaneIndex) { + LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + SpillVGPRs.push_back(LaneVGPR); + } else { + LaneVGPR = SpillVGPRs.back(); + } + + SGPRSpillsToVirtualVGPRLanes[FI].push_back( + SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); + return true; +} + +bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( + MachineFunction &MF, int FI, unsigned LaneIndex) { const GCNSubtarget &ST = MF.getSubtarget(); - unsigned WaveSize = ST.getWavefrontSize(); - return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LaneVGPR; + if (!LaneIndex) { + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we will not + // partially spill the SGPR to VGPRs. + SGPRSpillsToPhysicalVGPRLanes.erase(FI); + return false; + } + + allocateWWMSpill(MF, LaneVGPR); + reserveWWMRegister(LaneVGPR); + for (MachineBasicBlock &MBB : MF) { + MBB.addLiveIn(LaneVGPR); + MBB.sortUniqueLiveIns(); + } + } else { + LaneVGPR = WWMReservedRegs.back(); + } + + SGPRSpillsToPhysicalVGPRLanes[FI].push_back( + SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); + return true; } -/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. -bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, - int FI) { - std::vector &SpillLanes = SGPRToVGPRSpills[FI]; +bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, + int FI, + bool IsPrologEpilog) { + std::vector &SpillLanes = + IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI] + : SGPRSpillsToVirtualVGPRLanes[FI]; // This has already been allocated. if (!SpillLanes.empty()) @@ -307,7 +378,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned WaveSize = ST.getWavefrontSize(); unsigned Size = FrameInfo.getObjectSize(FI); @@ -319,49 +389,19 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, assert(Size >= 4 && "invalid sgpr spill size"); assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); - // Make sure to handle the case where a wide SGPR spill may span between two - // VGPRs. - for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { - Register LaneVGPR; - unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); - - if (VGPRIndex == 0) { - LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); - if (LaneVGPR == AMDGPU::NoRegister) { - // We have no VGPRs left for spilling SGPRs. Reset because we will not - // partially spill the SGPR to VGPRs. - SGPRToVGPRSpills.erase(FI); - NumVGPRSpillLanes -= I; - - // FIXME: We can run out of free registers with split allocation if - // IPRA is enabled and a called function already uses every VGPR. -#if 0 - DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(), - "VGPRs for SGPR spilling", - 0, DS_Error); - MF.getFunction().getContext().diagnose(DiagOutOfRegs); -#endif - return false; - } - - Optional SpillFI; - // We need to preserve inactive lanes, so always save, even caller-save - // registers. - if (!isEntryFunction()) { - SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); - } + unsigned &NumSpillLanes = + IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes; - SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI)); + for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { + unsigned LaneIndex = (NumSpillLanes % WaveSize); - // Add this register as live-in to all blocks to avoid machine verifier - // complaining about use of an undefined physical register. - for (MachineBasicBlock &BB : MF) - BB.addLiveIn(LaneVGPR); - } else { - LaneVGPR = SpillVGPRs.back().VGPR; + bool Allocated = IsPrologEpilog + ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex) + : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex); + if (!Allocated) { + NumSpillLanes -= I; + return false; } - - SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex)); } return true; @@ -429,6 +469,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, OtherUsedRegs.set(*NextSpillReg); SpillRegs.push_back(*NextSpillReg); + MRI.reserveReg(*NextSpillReg, TRI); Spill.Lanes[I] = *NextSpillReg++; } @@ -438,34 +479,38 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, bool SIMachineFunctionInfo::removeDeadFrameIndices( MachineFunction &MF, bool ResetSGPRSpillStackIDs) { MachineFrameInfo &MFI = MF.getFrameInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); // Remove dead frame indices from function frame, however keep FP & BP since // spills for them haven't been inserted yet. And also make sure to remove the - // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could - // result in an unexpected side effect and bug, in case of any re-mapping of - // freed frame indices by later pass(es) like "stack slot coloring". - for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) { - if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex && - (!TRI->isCFISavedRegsSpillEnabled() || R.first != EXECSaveIndex)) { + // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, + // otherwise, it could result in an unexpected side effect and bug, in case of + // any re-mapping of freed frame indices by later pass(es) like "stack slot + // coloring". + for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) { + MFI.RemoveStackObject(R.first); + SGPRSpillsToVirtualVGPRLanes.erase(R.first); + } + + // Remove the dead frame indices of CSR SGPRs which are spilled to physical + // VGPR lanes during SILowerSGPRSpills pass. + if (!ResetSGPRSpillStackIDs) { + for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) { MFI.RemoveStackObject(R.first); - SGPRToVGPRSpills.erase(R.first); + SGPRSpillsToPhysicalVGPRLanes.erase(R.first); } } - bool HaveSGPRToMemory = false; if (ResetSGPRSpillStackIDs) { - // All other SPGRs must be allocated on the default stack, so reset the + // All other SGPRs must be allocated on the default stack, so reset the // stack ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) { - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex && - (!TRI->isCFISavedRegsSpillEnabled() || i != EXECSaveIndex)) - if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { - MFI.setStackID(i, TargetStackID::Default); + for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; + ++I) { + if (!checkIndexInPrologEpilogSGPRSpills(I)) { + if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { + MFI.setStackID(I, TargetStackID::Default); HaveSGPRToMemory = true; } + } } } @@ -477,20 +522,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices( return HaveSGPRToMemory; } -void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( - MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { - assert(WWMReservedFrameIndexes.empty()); - - WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); - - int I = 0; - for (Register VGPR : WWMReservedRegs) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); - WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( - TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); - } -} - int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { if (ScavengeFI) @@ -515,6 +546,16 @@ MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; } +void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) { + VRegFlags.grow(Reg); +} + +void SIMachineFunctionInfo::MRI_NotecloneVirtualRegister(Register NewReg, + Register SrcReg) { + VRegFlags.grow(NewReg); + VRegFlags[NewReg] = VRegFlags[SrcReg]; +} + Register SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { const GCNSubtarget &ST = MF.getSubtarget(); @@ -617,11 +658,15 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { - for (Register Reg : MFI.WWMReservedRegs) + for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); if (MFI.getVGPRForAGPRCopy()) VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI); + + if (MFI.getSGPRForEXECCopy()) + SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI); + auto SFI = MFI.getOptionalScavengeFI(); if (SFI) ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo()); @@ -670,8 +715,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( return false; } -bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { - for (const BasicBlock &BB : MF.getFunction()) { +bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { const auto *CB = dyn_cast(&I); if (!CB) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 0410f0b7d6056..dd9151f9f2c4f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -36,8 +36,7 @@ class TargetRegisterClass; class AMDGPUPseudoSourceValue : public PseudoSourceValue { public: enum AMDGPUPSVKind : unsigned { - PSVBuffer = PseudoSourceValue::TargetCustom, - PSVImage, + PSVImage = PseudoSourceValue::TargetCustom, GWSResource }; @@ -61,31 +60,6 @@ class AMDGPUPseudoSourceValue : public PseudoSourceValue { } }; -class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue { -public: - explicit AMDGPUBufferPseudoSourceValue(const AMDGPUTargetMachine &TM) - : AMDGPUPseudoSourceValue(PSVBuffer, TM) {} - - static bool classof(const PseudoSourceValue *V) { - return V->kind() == PSVBuffer; - } - - void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; } -}; - -class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue { -public: - // TODO: Is the img rsrc useful? - explicit AMDGPUImagePseudoSourceValue(const AMDGPUTargetMachine &TM) - : AMDGPUPseudoSourceValue(PSVImage, TM) {} - - static bool classof(const PseudoSourceValue *V) { - return V->kind() == PSVImage; - } - - void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; } -}; - class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { public: explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) @@ -302,6 +276,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { SIMode Mode; Optional ScavengeFI; StringValue VGPRForAGPRCopy; + StringValue SGPRForEXECCopy; SIMachineFunctionInfo() = default; SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, @@ -343,14 +318,46 @@ template <> struct MappingTraits { YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI); YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy, StringValue()); // Don't print out when it's empty. + YamlIO.mapOptional("sgprForEXECCopy", MFI.SGPRForEXECCopy, + StringValue()); // Don't print out when it's empty. } }; } // end namespace yaml +// A CSR SGPR value can be preserved inside a callee using one of the following +// methods. +// 1. Copy to an unused scratch SGPR. +// 2. Spill to a VGPR lane. +// 3. Spill to memory via. a scratch VGPR. +// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used +// for an SGPR at function prolog/epilog. +enum class SGPRSaveKind : uint8_t { + COPY_TO_SCRATCH_SGPR, + SPILL_TO_VGPR_LANE, + SPILL_TO_MEM +}; + +class PrologEpilogSGPRSaveRestoreInfo { + SGPRSaveKind Kind; + union { + int Index; + Register Reg; + }; + +public: + PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {} + PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R) + : Kind(K), Reg(R) {} + Register getReg() const { return Reg; } + int getIndex() const { return Index; } + SGPRSaveKind getKind() const { return Kind; } +}; + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. -class SIMachineFunctionInfo final : public AMDGPUMachineFunction { +class SIMachineFunctionInfo final : public AMDGPUMachineFunction, + private MachineRegisterInfo::Delegate { friend class GCNTargetMachine; // State of MODE register, assumed FP mode. @@ -396,8 +403,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // unit. Minimum - first, maximum - second. std::pair WavesPerEU = {0, 0}; - const AMDGPUBufferPseudoSourceValue BufferPSV; - const AMDGPUImagePseudoSourceValue ImagePSV; const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; private: @@ -440,6 +445,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Pointer to where the ABI inserts special kernel arguments separate from the // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; + bool HostcallPtr : 1; + bool HeapPtr : 1; bool MayNeedAGPRs : 1; @@ -450,6 +457,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned HighBitsOf32BitAddress; + // Flags associated with the virtual registers. + IndexedMap VRegFlags; + // Current recorded maximum possible occupancy. unsigned Occupancy; @@ -459,48 +469,55 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { MCPhysReg getNextSystemSGPR() const; -public: - struct SGPRSpillVGPR { - // VGPR used for SGPR spills - Register VGPR; - - // If the VGPR is is used for SGPR spills in a non-entrypoint function, the - // stack slot used to save/restore it in the prolog/epilog. - Optional FI; - - SGPRSpillVGPR(Register V, Optional F) : VGPR(V), FI(F) {} - }; + // MachineRegisterInfo callback functions to notify events. + void MRI_NoteNewVirtualRegister(Register Reg) override; + void MRI_NotecloneVirtualRegister(Register NewReg, Register SrcReg) override; +public: struct VGPRSpillToAGPR { SmallVector Lanes; bool FullyAllocated = false; bool IsDead = false; }; - // Track VGPRs reserved for WWM. - SmallSetVector WWMReservedRegs; - - /// Track stack slots used for save/restore of reserved WWM VGPRs in the - /// prolog/epilog. - - /// FIXME: This is temporary state only needed in PrologEpilogInserter, and - /// doesn't really belong here. It does not require serialization - SmallVector WWMReservedFrameIndexes; - - void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, - const SIRegisterInfo &TRI); - - auto wwmAllocation() const { - assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size()); - return zip(WWMReservedRegs, WWMReservedFrameIndexes); - } - private: - // Track VGPR + wave index for each subregister of the SGPR spilled to - // frameindex key. - DenseMap> SGPRToVGPRSpills; - unsigned NumVGPRSpillLanes = 0; - SmallVector SpillVGPRs; + // To track virtual VGPR + lane index for each subregister of the SGPR spilled + // to frameindex key during SILowerSGPRSpills pass. + DenseMap> + SGPRSpillsToVirtualVGPRLanes; + // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs + // like Frame Pointer identified during PrologEpilogInserter. + DenseMap> + SGPRSpillsToPhysicalVGPRLanes; + unsigned NumVirtualVGPRSpillLanes = 0; + unsigned NumPhysicalVGPRSpillLanes = 0; + SmallVector SpillVGPRs; + using WWMSpillsMap = MapVector; + // To track the registers used in instructions that can potentially modify the + // inactive lanes. The WWM instructions and the writelane instructions for + // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs + // modified by them should be spilled/restored at function prolog/epilog to + // avoid any undesired outcome. Each entry in this map holds a pair of values, + // the VGPR and its stack slot index. + WWMSpillsMap WWMSpills; + + using ReservedRegSet = SmallSetVector; + // To track the VGPRs reserved for WWM instructions. They get stack slots + // later during PrologEpilogInserter and get added into the superset WWMSpills + // for actual spilling. A separate set makes the register reserved part and + // the serialization easier. + ReservedRegSet WWMReservedRegs; + + using PrologEpilogSGPRSpillsMap = + DenseMap; + // To track the SGPR spill method used for a CSR SGPR register during + // frame lowering. Even though the SGPR spills are handled during + // SILowerSGPRSpills pass, some special handling needed later during the + // PrologEpilogInserter. + PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills; + + // To save/restore EXEC MASK around WWM spills and copies. + Register SGPRForEXECCopy; DenseMap VGPRToAGPRSpills; @@ -517,6 +534,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { private: Register VGPRForAGPRCopy; + bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI, + unsigned LaneIndex); + bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI, + unsigned LaneIndex); + public: Register getVGPRForAGPRCopy() const { return VGPRForAGPRCopy; @@ -526,24 +548,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { VGPRForAGPRCopy = NewVGPRForAGPRCopy; } -public: // FIXME - /// If this is set, an SGPR used for save/restore of the register used for the - /// frame pointer. - Register SGPRForFPSaveRestoreCopy; - Optional FramePointerSaveIndex; - - /// If this is set, an SGPR used for save/restore of the register used for the - /// base pointer. - Register SGPRForBPSaveRestoreCopy; - Optional BasePointerSaveIndex; - - Optional EXECSaveIndex; - - bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); + bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const; public: - SIMachineFunctionInfo(const MachineFunction &MF); SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; + SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI); MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, @@ -555,28 +564,122 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange); - void reserveWWMRegister(Register Reg) { - WWMReservedRegs.insert(Reg); - } + void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } AMDGPU::SIModeRegisterDefaults getMode() const { return Mode; } ArrayRef - getSGPRToVGPRSpills(int FrameIndex) const { - auto I = SGPRToVGPRSpills.find(FrameIndex); - return (I == SGPRToVGPRSpills.end()) + getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const { + auto I = SGPRSpillsToVirtualVGPRLanes.find(FrameIndex); + return (I == SGPRSpillsToVirtualVGPRLanes.end()) + ? ArrayRef() + : makeArrayRef(I->second); + } + + ArrayRef getSGPRSpillVGPRs() const { return SpillVGPRs; } + const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } + const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } + + const PrologEpilogSGPRSpillsMap &getPrologEpilogSGPRSpills() const { + return PrologEpilogSGPRSpills; + } + + void addToPrologEpilogSGPRSpills(Register Reg, + PrologEpilogSGPRSaveRestoreInfo SI) { + PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI)); + } + + // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true + // on success and false otherwise. + bool hasPrologEpilogSGPRSpillEntry(Register Reg) const { + return PrologEpilogSGPRSpills.find(Reg) != PrologEpilogSGPRSpills.end(); + } + + // Get the scratch SGPR if allocated to save/restore \p Reg. + Register getScratchSGPRCopyDstReg(Register Reg) const { + auto I = PrologEpilogSGPRSpills.find(Reg); + if (I != PrologEpilogSGPRSpills.end() && + I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) + return I->second.getReg(); + + return AMDGPU::NoRegister; + } + + // Get all scratch SGPRs allocated to copy/restore the SGPR spills. + void getAllScratchSGPRCopyDstRegs(SmallVectorImpl &Regs) const { + for (const auto &SI : PrologEpilogSGPRSpills) { + if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) + Regs.push_back(SI.second.getReg()); + } + } + + // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI. + bool checkIndexInPrologEpilogSGPRSpills(int FI) const { + return find_if(PrologEpilogSGPRSpills, + [FI](const std::pair &SI) { + return SI.second.getKind() == + SGPRSaveKind::SPILL_TO_VGPR_LANE && + SI.second.getIndex() == FI; + }) != PrologEpilogSGPRSpills.end(); + } + + // Remove if an entry created for \p Reg. + void removePrologEpilogSGPRSpillEntry(Register Reg) { + auto I = PrologEpilogSGPRSpills.find(Reg); + if (I == PrologEpilogSGPRSpills.end()) + return; + + PrologEpilogSGPRSpills.erase(I); + } + + const PrologEpilogSGPRSaveRestoreInfo & + getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const { + auto I = PrologEpilogSGPRSpills.find(Reg); + assert(I != PrologEpilogSGPRSpills.end()); + + return I->second; + } + + ArrayRef + getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const { + auto I = SGPRSpillsToPhysicalVGPRLanes.find(FrameIndex); + return (I == SGPRSpillsToPhysicalVGPRLanes.end()) ? ArrayRef() : makeArrayRef(I->second); } - ArrayRef getSGPRSpillVGPRs() const { return SpillVGPRs; } + void setFlag(Register Reg, uint8_t Flag) { + assert(Reg.isVirtual()); + if (VRegFlags.inBounds(Reg)) + VRegFlags[Reg] |= (uint8_t)1 << Flag; + } + + bool checkFlag(Register Reg, uint8_t Flag) const { + if (Reg.isPhysical()) + return false; + + return VRegFlags.inBounds(Reg) && VRegFlags[Reg] & ((uint8_t)1 << Flag); + } + + void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4, + Align Alignment = Align(4)); + + void splitWWMSpillRegisters( + MachineFunction &MF, + SmallVectorImpl> &CalleeSavedRegs, + SmallVectorImpl> &ScratchRegs) const; ArrayRef getAGPRSpillVGPRs() const { return SpillAGPR; } + Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; } + + void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; } + ArrayRef getVGPRSpillAGPRs() const { return SpillVGPR; } @@ -593,9 +696,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { I->second.IsDead = true; } - bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, - unsigned NumLane) const; - bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); + bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, + bool IsPrologEpilog = false); bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill @@ -742,6 +844,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return ImplicitArgPtr; } + bool hasHostcallPtr() const { + return HostcallPtr; + } + + bool hasHeapPtr () const { + return HeapPtr; + } + bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } @@ -951,16 +1061,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { llvm_unreachable("unexpected dimension"); } - const AMDGPUBufferPseudoSourceValue * - getBufferPSV(const AMDGPUTargetMachine &TM) { - return &BufferPSV; - } - - const AMDGPUImagePseudoSourceValue * - getImagePSV(const AMDGPUTargetMachine &TM) { - return &ImagePSV; - } - const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM) { return &GWSResourcePSV; @@ -995,7 +1095,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. - bool mayUseAGPRs(const MachineFunction &MF) const; + bool mayUseAGPRs(const Function &F) const; // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index ff5587fbb0ca1..353470bcc4cc9 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1821,7 +1821,7 @@ void SIScheduleDAGMI::moveLowLatencies() { LastLowLatencyUser = i; // Moves COPY instructions on which depends // the low latency instructions too. - } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) { + } else if (SU->getInstr()->isCopy()) { bool CopyForLowLat = false; for (SDep& SuccDep : SU->Succs) { SUnit *Succ = SuccDep.getSUnit(); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 04c9a6457944c..80f15c34183ce 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -93,6 +93,7 @@ char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID; Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B64_term: case AMDGPU::S_MOV_B32: @@ -110,6 +111,7 @@ Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::COPY: + case AMDGPU::PRED_COPY: case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B32: { const MachineOperand &Dst = MI.getOperand(0); @@ -211,12 +213,12 @@ bool SIOptimizeExecMasking::removeTerminatorBit(MachineInstr &MI) const { switch (MI.getOpcode()) { case AMDGPU::S_MOV_B32_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + MI.setDesc(TII->get(RegSrc ? TII->getCopyOpcode() : AMDGPU::S_MOV_B32)); return true; } case AMDGPU::S_MOV_B64_term: { bool RegSrc = MI.getOperand(1).isReg(); - MI.setDesc(TII->get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); + MI.setDesc(TII->get(RegSrc ? TII->getCopyOpcode() : AMDGPU::S_MOV_B64)); return true; } case AMDGPU::S_XOR_B64_term: { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index e8b4849333bf6..04d38e1c758a4 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -546,11 +546,18 @@ bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const { return EnableSpillCFISavedRegs; } +MCRegister +SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF, + const unsigned Align, + const TargetRegisterClass *RC) const { + unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align; + MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); + return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC); +} + MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; - MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); - return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); + return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass); } BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -657,6 +664,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { assert(!isSubRegister(ScratchRSrcReg, BasePtrReg)); } + // SGPR used to preserve EXEC MASK around WWM spill/copy instructions. + Register ExecCopyReg = MFI->getSGPRForEXECCopy(); + if (ExecCopyReg) + reserveRegisterTuples(Reserved, ExecCopyReg); + // Reserve VGPRs/AGPRs. // unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); @@ -712,7 +724,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } - for (Register Reg : MFI->WWMReservedRegs) + for (Register Reg : MFI->getWWMReservedRegs()) reserveRegisterTuples(Reserved, Reg); // FIXME: Stop using reserved registers for this. @@ -722,9 +734,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) reserveRegisterTuples(Reserved, Reg); - for (auto SSpill : MFI->getSGPRSpillVGPRs()) - reserveRegisterTuples(Reserved, SSpill.VGPR); - return Reserved; } @@ -942,8 +951,6 @@ const TargetRegisterClass * SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) return getEquivalentVGPRClass(RC); - if (RC == &AMDGPU::SCC_CLASSRegClass) - return getWaveMaskRegClass(); return RC; } @@ -1080,6 +1087,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_AV32_SAVE: case AMDGPU::SI_SPILL_AV32_CFI_SAVE: case AMDGPU::SI_SPILL_AV32_RESTORE: + case AMDGPU::SI_SPILL_WWM_V32_SAVE: + case AMDGPU::SI_SPILL_WWM_V32_RESTORE: return 1; default: llvm_unreachable("Invalid spill opcode"); } @@ -1201,10 +1210,11 @@ static int getOffenMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder -spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, int Index, unsigned Lane, - unsigned ValueReg, bool IsKill, bool NeedsCFI) { +static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + int Index, unsigned Lane, + unsigned ValueReg, bool IsKill, bool NeedsCFI) { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -1228,8 +1238,9 @@ spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, // It could result in AGPR spills restored to VGPRs or the other way around, // making the src and dst with identical regclasses at this point. It just // needs a copy in such cases. - auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst) - .addReg(Src, getKillRegState(IsKill)); + MachineInstrBuilder CopyMIB = + MachineInstrBuilder(*MBB.getParent(), TII->buildCopy(MBB, MI, DL, Dst)); + CopyMIB.addReg(Src, getKillRegState(IsKill)); CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); if (NeedsCFI) TFL->buildCFIForRegToRegSpill(MBB, MI, DL, Src, Dst); @@ -1477,6 +1488,7 @@ void SIRegisterInfo::buildSpillLoadStore( } AdditionalCFIOffset = Offset; + // We currently only support spilling VGPRs to EltSize boundaries, meaning // we can simplify the adjustment of Offset here to just scale with // WavefrontSize. @@ -1549,6 +1561,7 @@ void SIRegisterInfo::buildSpillLoadStore( unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); const bool IsLastSubReg = i + 1 == e; + const bool IsFirstSubReg = i == 0; if (IsLastSubReg) { SOffsetRegState |= getKillRegState(Scavenged); // The last implicit use carries the "Kill" flag. @@ -1557,7 +1570,7 @@ void SIRegisterInfo::buildSpillLoadStore( // Make sure the whole register is defined if there are undef components by // adding an implicit def of the super-reg on the first instruction. - bool NeedSuperRegDef = e > 1 && IsStore && i == 0; + bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg; bool NeedSuperRegImpOperand = e > 1; // Remaining element size to spill into memory after some parts of it @@ -1585,11 +1598,13 @@ void SIRegisterInfo::buildSpillLoadStore( MIB.addReg(ValueReg, RegState::ImplicitDefine); NeedSuperRegDef = false; } - if (IsSubReg || NeedSuperRegImpOperand) { + if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) { NeedSuperRegImpOperand = true; unsigned State = SrcDstRegState; - if (Lane != LaneE) + if (!IsLastSubReg || (Lane != LaneE)) State &= ~RegState::Kill; + if (!IsFirstSubReg || (Lane != LaneS)) + State &= ~RegState::Define; MIB.addReg(ValueReg, RegState::Implicit | State); } RemEltSize -= 4; @@ -1656,7 +1671,6 @@ void SIRegisterInfo::buildSpillLoadStore( if (SOffset == AMDGPU::NoRegister) { if (!IsFlat) { if (UseVGPROffset && ScratchOffsetReg) { - assert(!FuncInfo->isEntryFunction()); MIB.addReg(ScratchOffsetReg); } else { assert(FuncInfo->isEntryFunction()); @@ -1734,15 +1748,15 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, } } -bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, - int Index, - RegScavenger *RS, - SlotIndexes *Indexes, - LiveIntervals *LIS, - bool OnlyToVGPR, bool NeedsCFI) const { +bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, + RegScavenger *RS, SlotIndexes *Indexes, + LiveIntervals *LIS, bool OnlyToVGPR, + bool SpillToPhysVGPRLane, bool NeedsCFI) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) + : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -1886,10 +1900,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, - LiveIntervals *LIS, bool OnlyToVGPR) const { + LiveIntervals *LIS, bool OnlyToVGPR, + bool SpillToPhysVGPRLane) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index); + ArrayRef VGPRSpills = + SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) + : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -2035,7 +2052,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, /// handled. bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, - SlotIndexes *Indexes, LiveIntervals *LIS) const { + SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const { bool NeedsCFI = false; switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S1024_CFI_SAVE: @@ -2060,7 +2077,8 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, Indexes, LIS, true, NeedsCFI); + return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane, + NeedsCFI); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: @@ -2071,7 +2089,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: - return restoreSGPR(MI, FI, RS, Indexes, LIS, true); + return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); default: llvm_unreachable("not an SGPR spill instruction"); } @@ -2204,7 +2222,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_AV128_SAVE: case AMDGPU::SI_SPILL_AV96_SAVE: case AMDGPU::SI_SPILL_AV64_SAVE: - case AMDGPU::SI_SPILL_AV32_SAVE: { + case AMDGPU::SI_SPILL_AV32_SAVE: + case AMDGPU::SI_SPILL_WWM_V32_SAVE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == @@ -2213,11 +2232,20 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; auto MBB = MI->getParent(); + bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); + if (IsWWMRegSpill) + TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), + RS->isRegUsed(AMDGPU::SCC)); + buildSpillLoadStore( *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS, nullptr, NeedsCFI); + MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); + if (IsWWMRegSpill) + TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); + MI->eraseFromParent(); break; } @@ -2250,7 +2278,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_AV224_RESTORE: case AMDGPU::SI_SPILL_AV256_RESTORE: case AMDGPU::SI_SPILL_AV512_RESTORE: - case AMDGPU::SI_SPILL_AV1024_RESTORE: { + case AMDGPU::SI_SPILL_AV1024_RESTORE: + case AMDGPU::SI_SPILL_WWM_V32_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == @@ -2259,10 +2288,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; auto MBB = MI->getParent(); + bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); + if (IsWWMRegSpill) + TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), + RS->isRegUsed(AMDGPU::SCC)); buildSpillLoadStore( *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); + if (IsWWMRegSpill) + TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); + MI->eraseFromParent(); break; } @@ -2497,8 +2533,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addReg(ScaledReg, RegState::Kill) .addImm(Offset); if (!IsSALU) - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) - .addReg(ScaledReg, RegState::Kill); + TII->buildCopy(*MBB, MI, DL, ResultReg, ScaledReg, + RegState::Kill); else ResultReg = ScaledReg; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 273fd4429d799..97a2d26b3a778 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -74,6 +74,12 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { bool isCFISavedRegsSpillEnabled() const; + /// Return the largest available SGPR aligned to \p Align for the register + /// class \p RC. + MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, + const unsigned Align, + const TargetRegisterClass *RC) const; + /// Return the end register initially reserved for the scratch buffer in case /// spilling is needed. MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; @@ -88,6 +94,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + bool addAllocPriorityToGlobalRanges() const override { + return true; + } + // Stack access is very expensive. CSRs are also the high registers, and we // want to minimize the number of used registers. unsigned getCSRFirstUseCost() const override { @@ -142,16 +152,15 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a /// free VGPR lane to spill. - bool spillSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS, - SlotIndexes *Indexes = nullptr, - LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false, + bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, + bool OnlyToVGPR = false, bool SpillToPhysVGPRLane = false, bool NeedsCFI = false) const; bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false) const; + bool OnlyToVGPR = false, + bool SpillToPhysVGPRLane = false) const; bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, @@ -161,10 +170,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { unsigned FIOperandNum, RegScavenger *RS) const override; - bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS, - SlotIndexes *Indexes = nullptr, - LiveIntervals *LIS = nullptr) const; + bool eliminateSGPRToVGPRSpillFrameIndex( + MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, + bool SpillToPhysVGPRLane = false) const; StringRef getRegAsmName(MCRegister Reg) const override; diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 53441b5a4cedf..dd59e0ed8ed6d 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -200,6 +200,7 @@ def : HWVALUWriteRes; } // End RetireOOO = 1 def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; } // End SchedModel = SIFullSpeedModel @@ -218,6 +219,7 @@ def : HWVALUWriteRes; } // End RetireOOO = 1 def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>; def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>; @@ -240,6 +242,7 @@ def : HWVALUWriteRes; } // End RetireOOO = 1 def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X")>; @@ -262,6 +265,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; @@ -310,6 +314,7 @@ def : HWWriteRes; } // End RetireOOO = 1 def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; } // End SchedModel = GFX10SpeedModel @@ -336,5 +341,6 @@ def : HWWriteRes; def : HWWriteRes; def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[WriteCopy], (instrs PRED_COPY)>; } // End SchedModel = GFX11SpeedModel diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c54d23ee40f3f..9658821ba211f 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -620,8 +620,7 @@ void SIShrinkInstructions::dropInstructionKeepingImpDefs( // This is really just a generic peephole that is not a canonical shrinking, // although requirements match the pass placement and it reduces code size too. MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { - assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || - MovT.getOpcode() == AMDGPU::COPY); + assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || MovT.isCopy()); Register T = MovT.getOperand(0).getReg(); unsigned Tsub = MovT.getOperand(0).getSubReg(); @@ -650,10 +649,8 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { MachineInstr *MovY = &*Iter; KilledT = MovY->killsRegister(T, TRI); - if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && - MovY->getOpcode() != AMDGPU::COPY) || - !MovY->getOperand(1).isReg() || - MovY->getOperand(1).getReg() != T || + if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && !MovY->isCopy()) || + !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T || MovY->getOperand(1).getSubReg() != Tsub || MovY->hasRegisterImplicitUseOperand(AMDGPU::M0)) continue; @@ -680,9 +677,7 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { } continue; } - if (MovX || - (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && - I->getOpcode() != AMDGPU::COPY) || + if (MovX || (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && !I->isCopy()) || I->getOperand(0).getReg() != X || I->getOperand(0).getSubReg() != Xsub) { MovX = nullptr; @@ -801,8 +796,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } } - if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || - MI.getOpcode() == AMDGPU::COPY)) { + if (ST->hasSwap() && + (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || MI.isCopy())) { if (auto *NextMI = matchSwap(MI)) { Next = NextMI->getIterator(); continue; diff --git a/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp b/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp new file mode 100644 index 0000000000000..b8d013c0b944c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SISimplifyPredicatedCopies.cpp @@ -0,0 +1,169 @@ +//===-- SISimplifyPredicatedCopies.cpp - Simplify Copies after regalloc +//--------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Simplify the predicated COPY (PRED_COPY) instructions for various register +/// classes. AMDGPU vector register copies have a predicated dependency with +/// EXEC register and should be marked exec as an implicit operand post-RA. The +/// scalar registers don't carry any such dependency and hence the regular COPY +/// opcode can be used. AMDGPU by default uses PRED_COPY opcode right from the +/// instruction selection and this pass would simplify the COPY opcode and the +/// implicit operand field as mentioned above. This pass also implements the +/// EXEC MASK manipulation around the whole wave vector register copies by +/// turning all bits of exec to one before the copy and then restore it +/// immediately afterwards. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-simplify-predicated-copies" + +namespace { + +class SISimplifyPredicatedCopies : public MachineFunctionPass { +public: + static char ID; + + SISimplifyPredicatedCopies() : MachineFunctionPass(ID) { + initializeSISimplifyPredicatedCopiesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "SI Simplify Predicated Copies"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool isWWMCopy(const MachineInstr &MI); + bool isSCCLiveAtMI(const MachineInstr &MI); + + LiveIntervals *LIS; + SlotIndexes *Indexes; + const SIRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + SIMachineFunctionInfo *MFI; +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SISimplifyPredicatedCopies, DEBUG_TYPE, + "SI Simplify Predicated Copies", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(SISimplifyPredicatedCopies, DEBUG_TYPE, + "SI Simplify Predicated Copies", false, false) + +char SISimplifyPredicatedCopies::ID = 0; + +char &llvm::SISimplifyPredicatedCopiesID = SISimplifyPredicatedCopies::ID; + +// Returns true if \p MI is a whole-wave copy instruction. Iterate +// recursively skipping the intermediate copies if it maps to any +// whole-wave operation. +bool SISimplifyPredicatedCopies::isWWMCopy(const MachineInstr &MI) { + Register SrcReg = MI.getOperand(1).getReg(); + + if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG)) + return true; + + if (SrcReg.isPhysical()) + return false; + + // Look recursively skipping intermediate copies. + const MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg); + if (!DefMI || !DefMI->isCopy()) + return false; + + return isWWMCopy(*DefMI); +} + +bool SISimplifyPredicatedCopies::isSCCLiveAtMI(const MachineInstr &MI) { + // We can't determine the liveness info if LIS isn't available. Early return + // in that case and always assume SCC is live. + if (!LIS) + return true; + + LiveRange &LR = + LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); + SlotIndex Idx = LIS->getInstructionIndex(MI); + return LR.liveAt(Idx); +} + +bool SISimplifyPredicatedCopies::runOnMachineFunction(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + MFI = MF.getInfo(); + LIS = getAnalysisIfAvailable(); + Indexes = getAnalysisIfAvailable(); + TRI = ST.getRegisterInfo(); + MRI = &MF.getRegInfo(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AMDGPU::COPY: + case AMDGPU::PRED_COPY: + if (!TII->isVGPRCopy(MI) && + TRI->isSGPRReg(*MRI, MI.getOperand(1).getReg())) { + // For PRED_COPY with SGPR regclass, change the opcode back to the + // regular COPY. + if (Opcode == AMDGPU::PRED_COPY) { + LLVM_DEBUG(dbgs() << MI << " to use COPY opcode"); + MI.setDesc(TII->get(AMDGPU::COPY)); + Changed = true; + } + } else { + if (TII->isVGPRCopy(MI) && + !TRI->isSGPRReg(*MRI, MI.getOperand(1).getReg()) && + MI.getOperand(0).getReg().isVirtual() && isWWMCopy(MI)) { + // For WWM vector copies, manipulate the exec mask around the copy + // instruction. + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator InsertPt = MI.getIterator(); + Register RegForExecCopy = MFI->getSGPRForEXECCopy(); + TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, + isSCCLiveAtMI(MI), Indexes); + TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); + LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); + } + + // For vector registers, add implicit exec use. + if (!MI.readsRegister(AMDGPU::EXEC, TRI)) { + MI.addOperand(MF, + MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + LLVM_DEBUG(dbgs() << "Add exec use to " << MI); + Changed = true; + } + } + break; + default: + break; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 0a80779f8a5cd..c10cdf5a37eed 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -747,11 +747,9 @@ SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB, Register SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr *Save = - BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg) - .addReg(AMDGPU::SCC); + TII->buildCopy(MBB, Before, DebugLoc(), SaveReg, AMDGPU::SCC); MachineInstr *Restore = - BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::SCC) - .addReg(SaveReg); + TII->buildCopy(MBB, Before, DebugLoc(), AMDGPU::SCC, SaveReg); LIS->InsertMachineInstrInMaps(*Save); LIS->InsertMachineInstrInMaps(*Restore); @@ -1229,8 +1227,7 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB, MachineInstr *MI; if (SavedWQM) { - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), Exec) - .addReg(SavedWQM); + MI = TII->buildCopy(MBB, Before, DebugLoc(), Exec, SavedWQM); } else { MI = BuildMI(MBB, Before, DebugLoc(), TII->get(WQMOpc), Exec).addReg(Exec); } @@ -1315,7 +1312,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) { auto II = MBB.getFirstNonPHI(), IE = MBB.end(); if (IsEntry) { // Skip the instruction that saves LiveMask - if (II != IE && II->getOpcode() == AMDGPU::COPY) + if (II != IE && II->isCopy()) ++II; } @@ -1484,8 +1481,7 @@ void SIWholeQuadMode::lowerLiveMaskQueries() { Register Dest = MI->getOperand(0).getReg(); MachineInstr *Copy = - BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest) - .addReg(LiveMaskReg); + TII->buildCopy(*MI->getParent(), MI, DL, Dest, LiveMaskReg); LIS->ReplaceMachineInstrInMaps(*MI, *Copy); MI->eraseFromParent(); @@ -1523,7 +1519,7 @@ void SIWholeQuadMode::lowerCopyInstrs() { MI->removeOperand(Index); Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC); } - MI->setDesc(TII->get(AMDGPU::COPY)); + MI->setDesc(TII->get(TII->getCopyOpcode())); LLVM_DEBUG(dbgs() << " -> " << *MI); } } @@ -1541,7 +1537,7 @@ void SIWholeQuadMode::lowerCopyInstrs() { assert(MI->getNumExplicitOperands() == 2); } - MI->setDesc(TII->get(AMDGPU::COPY)); + MI->setDesc(TII->get(TII->getCopyOpcode())); } } @@ -1624,8 +1620,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { if (NeedsLiveMask || (GlobalFlags & StateWQM)) { LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC()); MachineInstr *MI = - BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) - .addReg(Exec); + TII->buildCopy(Entry, EntryMI, DebugLoc(), LiveMaskReg, Exec); LIS->InsertMachineInstrInMaps(*MI); } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 5cd6effc37c5e..3cd1f49f2b1e6 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -524,7 +524,8 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32", class SelectPat : PatFrag < (ops node:$src1, node:$src2), (select SCC, $src1, $src2), - [{ return !N->isDivergent(); }] + [{ return Subtarget->hasScalarCompareEq64() && + N->getOperand(0)->hasOneUse() && !N->isDivergent(); }] >; let Uses = [SCC] in { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index dda515595e4d9..0d4dfd9ced66f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -31,7 +31,7 @@ static llvm::cl::opt AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), - llvm::cl::init(4)); + llvm::cl::init(5)); namespace { @@ -149,34 +149,40 @@ unsigned getAmdhsaCodeObjectVersion() { return AmdhsaCodeObjectVersion; } -unsigned getMultigridSyncArgImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { - case 2: - case 3: - case 4: +unsigned getCodeObjectVersion(const Module &M) { + if (auto Ver = mdconst::extract_or_null( + M.getModuleFlag("amdgpu_code_object_version"))) { + return (unsigned)Ver->getZExtValue() / 100; + } + + // Default code object version. + return AMDHSA_COV5; +} + +unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { + switch (CodeObjectVersion) { + case AMDHSA_COV2: + case AMDHSA_COV3: + case AMDHSA_COV4: return 48; - case 5: - return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; + case AMDHSA_COV5: default: - llvm_unreachable("Unexpected code object version"); - return 0; + return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; } } // FIXME: All such magic numbers about the ABI should be in a // central TD file. -unsigned getHostcallImplicitArgPosition() { - switch (AmdhsaCodeObjectVersion) { - case 2: - case 3: - case 4: +unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { + switch (CodeObjectVersion) { + case AMDHSA_COV2: + case AMDHSA_COV3: + case AMDHSA_COV4: return 24; - case 5: - return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; + case AMDHSA_COV5: default: - llvm_unreachable("Unexpected code object version"); - return 0; + return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; } } @@ -565,7 +571,7 @@ namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) : STI(STI), XnackSetting(TargetIDSetting::Any), - SramEccSetting(TargetIDSetting::Any) { + SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) { if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) XnackSetting = TargetIDSetting::Unsupported; if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) @@ -676,9 +682,9 @@ std::string AMDGPUTargetID::toString() const { .str(); std::string Features; - if (Optional HsaAbiVersion = getHsaAbiVersion(&STI)) { - switch (*HsaAbiVersion) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: + if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { + switch (CodeObjectVersion) { + case AMDGPU::AMDHSA_COV2: // Code object V2 only supported specific processors and had fixed // settings for the XNACK. if (Processor == "gfx600") { @@ -726,7 +732,7 @@ std::string AMDGPUTargetID::toString() const { Twine(Processor)); } break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V3: + case AMDGPU::AMDHSA_COV3: // xnack. if (isXnackOnOrAny()) Features += "+xnack"; @@ -735,8 +741,8 @@ std::string AMDGPUTargetID::toString() const { if (isSramEccOnOrAny()) Features += "+sram-ecc"; break; - case ELF::ELFABIVERSION_AMDGPU_HSA_V4: - case ELF::ELFABIVERSION_AMDGPU_HSA_V5: + case AMDGPU::AMDHSA_COV4: + case AMDGPU::AMDHSA_COV5: // sramecc. if (getSramEccSetting() == TargetIDSetting::Off) Features += ":sramecc-"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a8642a0d1da85..340abb06e7e47 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -12,6 +12,7 @@ #include "SIDefines.h" #include "llvm/ADT/Optional.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Alignment.h" #include #include @@ -41,6 +42,13 @@ namespace AMDGPU { struct IsaVersion; +enum { + AMDHSA_COV2 = 2, + AMDHSA_COV3 = 3, + AMDHSA_COV4 = 4, + AMDHSA_COV5 = 5 +}; + /// \returns HSA OS ABI Version identification. Optional getHsaAbiVersion(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 2, @@ -60,14 +68,17 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI); bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr -unsigned getMultigridSyncArgImplicitArgPosition(); +unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); /// \returns The offset of the hostcall pointer argument from implicitarg_ptr -unsigned getHostcallImplicitArgPosition(); +unsigned getHostcallImplicitArgPosition(unsigned COV); /// \returns Code object version. unsigned getAmdhsaCodeObjectVersion(); +/// \returns Code object version. +unsigned getCodeObjectVersion(const Module &M); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; @@ -112,6 +123,7 @@ class AMDGPUTargetID { const MCSubtargetInfo &STI; TargetIDSetting XnackSetting; TargetIDSetting SramEccSetting; + unsigned CodeObjectVersion; public: explicit AMDGPUTargetID(const MCSubtargetInfo &STI); @@ -141,6 +153,10 @@ class AMDGPUTargetID { return XnackSetting; } + void setCodeObjectVersion(unsigned COV) { + CodeObjectVersion = COV; + } + /// Sets xnack setting to \p NewXnackSetting. void setXnackSetting(TargetIDSetting NewXnackSetting) { XnackSetting = NewXnackSetting; @@ -688,6 +704,12 @@ getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); LLVM_READONLY bool isTrue16Inst(unsigned Opc); +LLVM_READONLY +bool isVOPD(unsigned Opc); + +LLVM_READONLY +bool isTrue16Inst(unsigned Opc); + LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index bb2b918837c6e..9b10012042f09 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -475,7 +475,8 @@ def shl_0_to_4 : PatFrag< let GISelPredicateCode = [{ int64_t Imm = 0; if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) && - !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm)))) + !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm))) && + !mi_match(MI.getOperand(2).getReg(), MRI, m_Pred_Copy(m_ICst(Imm)))) return false; return (uint64_t)Imm <= 4; }]; diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp index 6e8190ee7209d..fe78a98837cf9 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp +++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp @@ -290,12 +290,10 @@ void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - Register SrcReg, bool IsKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void ARCInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -323,7 +321,8 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h index ebc02a93b1244..c55c9535ec296 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.h +++ b/llvm/lib/Target/ARC/ARCInstrInfo.h @@ -71,12 +71,14 @@ class ARCInstrInfo : public ARCGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h index 454206037498e..cace92a2b8fb6 100644 --- a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h +++ b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h @@ -27,11 +27,7 @@ class ARCFunctionInfo : public MachineFunctionInfo { unsigned ReturnStackOffset; public: - ARCFunctionInfo() - : ReturnStackOffsetSet(false), VarArgsFrameIndex(0), - ReturnStackOffset(-1U), MaxCallStackReq(0) {} - - explicit ARCFunctionInfo(MachineFunction &MF) + explicit ARCFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) : ReturnStackOffsetSet(false), VarArgsFrameIndex(0), ReturnStackOffset(-1U), MaxCallStackReq(0) {} ~ARCFunctionInfo() {} diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp index 21757927d8734..4cf1198bcfe5e 100644 --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -78,6 +78,12 @@ void ARCPassConfig::addPreRegAlloc() { addPass(createARCOptAddrMode()); } +MachineFunctionInfo *ARCTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return ARCFunctionInfo::create(Allocator, F, STI); +} + // Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTarget() { RegisterTargetMachine X(getTheARCTarget()); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index a5a5b73561547..659740e81d644 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1111,11 +1111,12 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, return MIB.addReg(Reg, State, SubIdx); } -void ARMBaseInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); Align Alignment = MFI.getObjectAlign(FI); @@ -1367,11 +1368,12 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, return false; } -void ARMBaseInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 453e3fa1b99b1..af3be1b8dc10f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -207,16 +207,17 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 9596e88deb18b..aa9d8b54d9636 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -13,12 +13,11 @@ using namespace llvm; void ARMFunctionInfo::anchor() {} -static bool GetBranchTargetEnforcement(MachineFunction &MF) { - const auto &Subtarget = MF.getSubtarget(); - if (!Subtarget.isMClass() || !Subtarget.hasV7Ops()) +static bool GetBranchTargetEnforcement(const Function &F, + const ARMSubtarget *Subtarget) { + if (!Subtarget->isMClass() || !Subtarget->hasV7Ops()) return false; - const Function &F = MF.getFunction(); if (!F.hasFnAttribute("branch-target-enforcement")) { if (const auto *BTE = mdconst::extract_or_null( F.getParent()->getModuleFlag("branch-target-enforcement"))) @@ -61,17 +60,14 @@ static std::pair GetSignReturnAddress(const Function &F) { return {true, false}; } -ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) - : isThumb(MF.getSubtarget().isThumb()), - hasThumb2(MF.getSubtarget().hasThumb2()), - IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")), - IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")), - BranchTargetEnforcement(GetBranchTargetEnforcement(MF)) { - - const auto &Subtarget = MF.getSubtarget(); - if (Subtarget.isMClass() && Subtarget.hasV7Ops()) - std::tie(SignReturnAddress, SignReturnAddressAll) = - GetSignReturnAddress(MF.getFunction()); +ARMFunctionInfo::ARMFunctionInfo(const Function &F, + const ARMSubtarget *Subtarget) + : isThumb(Subtarget->isThumb()), hasThumb2(Subtarget->hasThumb2()), + IsCmseNSEntry(F.hasFnAttribute("cmse_nonsecure_entry")), + IsCmseNSCall(F.hasFnAttribute("cmse_nonsecure_call")), + BranchTargetEnforcement(GetBranchTargetEnforcement(F, Subtarget)) { + if (Subtarget->isMClass() && Subtarget->hasV7Ops()) + std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F); } MachineFunctionInfo * diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index e906fea1a8109..aba1afec3d483 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -22,6 +22,8 @@ namespace llvm { +class ARMSubtarget; + /// ARMFunctionInfo - This class is derived from MachineFunctionInfo and /// contains private ARM-specific information for each MachineFunction. class ARMFunctionInfo : public MachineFunctionInfo { @@ -157,7 +159,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { public: ARMFunctionInfo() = default; - explicit ARMFunctionInfo(MachineFunction &MF); + explicit ARMFunctionInfo(const Function &F, const ARMSubtarget *STI); MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td index 0a440555a0c44..2e572f165c69c 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -152,6 +152,7 @@ def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", // ----------------------------------------------------------------------------- def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; +def : InstRW<[A57Write_1cyc_1I], (instrs PRED_COPY)>; // --- 3.2 Branch Instructions --- // B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ diff --git a/llvm/lib/Target/ARM/ARMScheduleM4.td b/llvm/lib/Target/ARM/ARMScheduleM4.td index bfa5fc0d71319..6e358ed5c934b 100644 --- a/llvm/lib/Target/ARM/ARMScheduleM4.td +++ b/llvm/lib/Target/ARM/ARMScheduleM4.td @@ -82,6 +82,7 @@ def : M4UnitL1; def : M4UnitL1; def : M4UnitL1I<(instregex "(t|t2)MOV")>; def : M4UnitL1I<(instrs COPY)>; +def : M4UnitL1I<(instrs PRED_COPY)>; def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>; def : M4UnitL1I<(instregex "t2CLREX")>; def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]", diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td index 466acec6f76ae..5916d783d3bc9 100644 --- a/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -210,6 +210,7 @@ def : SchedAlias; // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. // def : InstRW<[WriteALU], (instrs COPY)>; +def : InstRW<[WriteALU], (instrs PRED_COPY)>; def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index d95c21d6504b7..7da8e441282f9 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,6 +11,7 @@ #include "ARMTargetMachine.h" #include "ARM.h" +#include "ARMMachineFunctionInfo.h" #include "ARMMacroFusion.h" #include "ARMSubtarget.h" #include "ARMTargetObjectFile.h" @@ -264,6 +265,13 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; +MachineFunctionInfo *ARMBaseTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return ARMFunctionInfo::create( + Allocator, F, static_cast(STI)); +} + const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index 8d33a038deeb0..cd5956588ccfc 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -73,6 +73,10 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { bool targetSchedulesPostRAScheduling() const override { return true; }; + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 1a36c2ca9152e..f077435a2cc76 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -75,11 +75,12 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } -void Thumb1InstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { assert((RC == &ARM::tGPRRegClass || (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && "Unknown regclass!"); @@ -103,11 +104,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void Thumb1InstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { assert( (RC->hasSuperClassEq(&ARM::tGPRRegClass) || (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 0b8f3ae7c7761..984bec4e64490 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -41,16 +41,17 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; private: diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 60dbc7b920132..b5ebd08f3370c 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -160,11 +160,12 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .add(predOps(ARMCC::AL)); } -void Thumb2InstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -200,14 +201,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI, + Register()); } -void Thumb2InstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -244,7 +247,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI, + Register()); } void Thumb2InstrInfo::expandLoadStackGuard( diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index a83ff5e510048..4bb412f09dcbe 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -42,16 +42,17 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 059e627f794af..a1bc865ffb8a1 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -124,12 +124,10 @@ unsigned AVRInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return 0; } -void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void AVRInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { MachineFunction &MF = *MBB.getParent(); AVRMachineFunctionInfo *AFI = MF.getInfo(); @@ -162,7 +160,8 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h index 6d0596642fa15..f84837a92e1e8 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -78,11 +78,13 @@ class AVRInstrInfo : public AVRGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h index da4c48559d9e2..45c367a4dcd49 100644 --- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h +++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h @@ -45,20 +45,15 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo { int VarArgsFrameIndex; public: - AVRMachineFunctionInfo() - : HasSpills(false), HasAllocas(false), HasStackArgs(false), - IsInterruptHandler(false), IsSignalHandler(false), - CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} - - explicit AVRMachineFunctionInfo(MachineFunction &MF) + AVRMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) : HasSpills(false), HasAllocas(false), HasStackArgs(false), CalleeSavedFrameSize(0), VarArgsFrameIndex(0) { - unsigned CallConv = MF.getFunction().getCallingConv(); + CallingConv::ID CallConv = F.getCallingConv(); - this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR || - MF.getFunction().hasFnAttribute("interrupt"); - this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL || - MF.getFunction().hasFnAttribute("signal"); + this->IsInterruptHandler = + CallConv == CallingConv::AVR_INTR || F.hasFnAttribute("interrupt"); + this->IsSignalHandler = + CallConv == CallingConv::AVR_SIGNAL || F.hasFnAttribute("signal"); } MachineFunctionInfo * diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp index b9d77e0d1a514..bba89269c6624 100644 --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/TargetRegistry.h" #include "AVR.h" +#include "AVRMachineFunctionInfo.h" #include "AVRTargetObjectFile.h" #include "MCTargetDesc/AVRMCTargetDesc.h" #include "TargetInfo/AVRTargetInfo.h" @@ -103,6 +104,13 @@ const AVRSubtarget *AVRTargetMachine::getSubtargetImpl(const Function &) const { return &SubTarget; } +MachineFunctionInfo *AVRTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return AVRMachineFunctionInfo::create(Allocator, F, + STI); +} + //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.h b/llvm/lib/Target/AVR/AVRTargetMachine.h index 54669eda060c6..885b1db28deb3 100644 --- a/llvm/lib/Target/AVR/AVRTargetMachine.h +++ b/llvm/lib/Target/AVR/AVRTargetMachine.h @@ -41,6 +41,10 @@ class AVRTargetMachine : public LLVMTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + private: std::unique_ptr TLOF; AVRSubtarget SubTarget; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp index e61e32b62d838..2209f1f1462b4 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -125,7 +125,8 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -148,7 +149,8 @@ void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h index e797363ead8f2..354aca1bd2f93 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -39,12 +39,14 @@ class BPFInstrInfo : public BPFGenInstrInfo { MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp index 9907f39b3f902..cedcbff1db24f 100644 --- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp @@ -476,7 +476,8 @@ bool CSKYFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI, + Register()); } return true; @@ -497,7 +498,8 @@ bool CSKYFrameLowering::restoreCalleeSavedRegisters( for (auto &CS : reverse(CSI)) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index 0bf739452fd2b..e5581bcdc3975 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -392,7 +392,8 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -435,7 +436,8 @@ void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h index a979b0bf4b0db..dbb69a7a87980 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h @@ -44,12 +44,14 @@ class CSKYInstrInfo : public CSKYGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h index 57e0d62481ad7..51eda696709d0 100644 --- a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h +++ b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h @@ -31,7 +31,7 @@ class CSKYMachineFunctionInfo : public MachineFunctionInfo { unsigned PICLabelUId = 0; public: - CSKYMachineFunctionInfo(MachineFunction &) {} + CSKYMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index e744a52162e24..0128c1121fecc 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1422,7 +1422,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); int FI = I.getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register()); if (IsKill) MBB.addLiveIn(Reg); } @@ -1487,7 +1487,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, Register Reg = I.getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); int FI = I.getFrameIdx(); - HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register()); } return true; @@ -1864,7 +1864,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, .addReg(TmpR0, RegState::Kill); auto *HRI = B.getParent()->getSubtarget().getRegisterInfo(); - HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI); + HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register()); expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); NewRegs.push_back(TmpR0); @@ -1895,7 +1895,7 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, .addImm(0x01010101); MachineFunction &MF = *B.getParent(); auto *HRI = MF.getSubtarget().getRegisterInfo(); - HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI); + HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register()); expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 74cd4ce3e309f..783fd68b8b3b9 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -953,8 +953,11 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -997,10 +1000,12 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void HexagonInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 703a894132bb5..1848c35dedbff 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -181,19 +181,20 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// machine basic block before the specified machine instruction. If isKill /// is true, the register operand is the last use and must be marked kill. void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; /// Load the specified register of the given register class from the specified /// stack frame index. The load instruction is to be added to the given /// machine basic block before the specified machine instruction. void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; /// This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index a02de24b176ae..fa3c783ecbfb5 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -41,7 +41,9 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { public: HexagonMachineFunctionInfo() = default; - HexagonMachineFunctionInfo(MachineFunction &MF) {} + HexagonMachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} + MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 4e04939e6690d..2696389769a01 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -14,6 +14,7 @@ #include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonLoopIdiomRecognition.h" +#include "HexagonMachineFunctionInfo.h" #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" #include "HexagonTargetTransformInfo.h" @@ -302,6 +303,13 @@ HexagonTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(HexagonTTIImpl(this, F)); } +MachineFunctionInfo *HexagonTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return HexagonMachineFunctionInfo::create( + Allocator, F, STI); +} + HexagonTargetMachine::~HexagonTargetMachine() = default; namespace { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index 947df7574ab34..6145a757be75f 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -44,6 +44,10 @@ class HexagonTargetMachine : public LLVMTargetMachine { HexagonTargetObjectFile *getObjFileLowering() const override { return static_cast(TLOF.get()); } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index bef2458fd1262..ccdfb32271508 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -50,7 +50,7 @@ void LanaiInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register SourceRegister, bool IsKill, int FrameIndex, const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo * /*RegisterInfo*/) const { + const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/) const { DebugLoc DL; if (Position != MBB.end()) { DL = Position->getDebugLoc(); @@ -70,7 +70,7 @@ void LanaiInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator Position, Register DestinationRegister, int FrameIndex, const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo * /*RegisterInfo*/) const { + const TargetRegisterInfo * /*RegisterInfo*/, Register /*VReg*/) const { DebugLoc DL; if (Position != MBB.end()) { DL = Position->getDebugLoc(); diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index 5eef4474801d4..62f6240c6e468 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -51,19 +51,19 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { const DebugLoc &DL, MCRegister DestinationRegister, MCRegister SourceRegister, bool KillSource) const override; - void - storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator Position, - Register SourceRegister, bool IsKill, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo *RegisterInfo) const override; - - void - loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator Position, - Register DestinationRegister, int FrameIndex, - const TargetRegisterClass *RegisterClass, - const TargetRegisterInfo *RegisterInfo) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Position, + Register SourceRegister, bool IsKill, int FrameIndex, + const TargetRegisterClass *RegisterClass, + const TargetRegisterInfo *RegisterInfo, + Register VReg) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Position, + Register DestinationRegister, int FrameIndex, + const TargetRegisterClass *RegisterClass, + const TargetRegisterInfo *RegisterInfo, + Register VReg) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h index edf5f2ee087ec..81f4a122d022f 100644 --- a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h +++ b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h @@ -38,7 +38,7 @@ class LanaiMachineFunctionInfo : public MachineFunctionInfo { int VarArgsFrameIndex; public: - explicit LanaiMachineFunctionInfo(MachineFunction &MF) + LanaiMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) : VarArgsFrameIndex(0) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp index 8af40d18d1068..1a6df0af43591 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -13,6 +13,7 @@ #include "LanaiTargetMachine.h" #include "Lanai.h" +#include "LanaiMachineFunctionInfo.h" #include "LanaiTargetObjectFile.h" #include "LanaiTargetTransformInfo.h" #include "TargetInfo/LanaiTargetInfo.h" @@ -72,6 +73,13 @@ LanaiTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(LanaiTTIImpl(this, F)); } +MachineFunctionInfo *LanaiTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return LanaiMachineFunctionInfo::create(Allocator, + F, STI); +} + namespace { // Lanai Code Generator Pass Configuration Options. class LanaiPassConfig : public TargetPassConfig { diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/llvm/lib/Target/Lanai/LanaiTargetMachine.h index 258e58c86253d..f829b0437f735 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.h +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.h @@ -47,6 +47,10 @@ class LanaiTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool isMachineVerifierClean() const override { return false; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 53c0c4c9c419d..a6640274b3ede 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -54,7 +54,7 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void LoongArchInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -84,10 +84,12 @@ void LoongArchInstrInfo::storeRegToStackSlot( .addMemOperand(MMO); } -void LoongArchInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, - int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DstReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index af1070338b39d..e88cbb99921c1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -35,11 +35,13 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { MachineBasicBlock::iterator MBBI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index d4a6c884bc9d2..c17d72bcc911f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -33,7 +33,8 @@ class LoongArchMachineFunctionInfo : public MachineFunctionInfo { unsigned CalleeSavedStackSize = 0; public: - LoongArchMachineFunctionInfo(const MachineFunction &MF) {} + LoongArchMachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.h b/llvm/lib/Target/M68k/M68kFrameLowering.h index a5349377232eb..fd010a908942c 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.h +++ b/llvm/lib/Target/M68k/M68kFrameLowering.h @@ -110,7 +110,8 @@ class M68kFrameLowering : public TargetFrameLowering { bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; /// Issues instruction(s) to restore all callee saved registers and returns /// true if it isn't possible / profitable to do so by issuing a series of diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index ca64dc191e5ca..6009c098f758e 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -739,12 +739,10 @@ bool M68kInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, return true; } -void M68kInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool IsKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void M68kInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && "Stack slot is too small to store"); @@ -760,7 +758,8 @@ void M68kInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { const MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); assert(MFI.getObjectSize(FrameIndex) >= TRI->getSpillSize(*RC) && "Stack slot is too small to load"); diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h index 84d50c181ead9..b6057a39bc826 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.h +++ b/llvm/lib/Target/M68k/M68kInstrInfo.h @@ -281,12 +281,14 @@ class M68kInstrInfo : public M68kGenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 0646d6faebedd..7405716516643 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -33,11 +33,10 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI) : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), RI() {} -void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void MSP430InstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -64,7 +63,8 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const{ + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index 710913b2d36f6..94cf9f8e1f164 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -40,16 +40,16 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIndex, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MI, Register DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 93b3882558774..74ffaebe429f5 100644 --- a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -40,8 +40,8 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo { public: MSP430MachineFunctionInfo() = default; - explicit MSP430MachineFunctionInfo(MachineFunction &MF) - : CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {} + MSP430MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 6bba224aab8b2..48215e19888dc 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -12,6 +12,7 @@ #include "MSP430TargetMachine.h" #include "MSP430.h" +#include "MSP430MachineFunctionInfo.h" #include "TargetInfo/MSP430TargetInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -71,6 +72,13 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { return new MSP430PassConfig(*this, PM); } +MachineFunctionInfo *MSP430TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return MSP430MachineFunctionInfo::create(Allocator, + F, STI); +} + bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/llvm/lib/Target/MSP430/MSP430TargetMachine.h index ef757dc7cb78a..c7129a03f3100 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -41,6 +41,10 @@ class MSP430TargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; // MSP430TargetMachine. } // end namespace llvm diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index 8b98ad3dceea3..ad977a7656269 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -134,18 +134,19 @@ class MipsInstrInfo : public MipsGenInstrInfo { unsigned getInstSizeInBytes(const MachineInstr &MI) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override { + const TargetRegisterInfo *TRI, + Register VReg) const override { storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); } void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override { + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override { loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); } diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h index 7b17fd3ed0cdb..b7b748838569f 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -24,7 +24,7 @@ namespace llvm { /// Mips target-specific information for each MachineFunction. class MipsFunctionInfo : public MachineFunctionInfo { public: - MipsFunctionInfo(MachineFunction &MF) {} + MipsFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 7729d9cf92dae..38f6889a52358 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -508,7 +508,8 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, - MipsFI->getEhDataRegFI(I), RC, &RegInfo); + MipsFI->getEhDataRegFI(I), RC, &RegInfo, + Register()); } // Emit .cfi_offset directives for eh data registers. @@ -726,7 +727,8 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, // Insert instructions that restore eh data registers. for (int J = 0; J < 4; ++J) { TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), - MipsFI->getEhDataRegFI(J), RC, &RegInfo); + MipsFI->getEhDataRegFI(J), RC, &RegInfo, + Register()); } } @@ -759,7 +761,7 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub( // Restore EPC STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(0), PtrRC, - STI.getRegisterInfo()); + STI.getRegisterInfo(), Register()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014) .addReg(Mips::K1) .addImm(0); @@ -767,7 +769,7 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub( // Restore Status STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, MipsFI->getISRRegFI(1), PtrRC, - STI.getRegisterInfo()); + STI.getRegisterInfo(), Register()); BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) .addReg(Mips::K1) .addImm(0); @@ -830,7 +832,8 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. bool IsKill = !IsRAAndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI, + Register()); } return true; diff --git a/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/llvm/lib/Target/Mips/MipsScheduleGeneric.td index 931412cb261ef..0d4a29201888a 100644 --- a/llvm/lib/Target/Mips/MipsScheduleGeneric.td +++ b/llvm/lib/Target/Mips/MipsScheduleGeneric.td @@ -51,6 +51,7 @@ def : InstRW<[GenericWriteALU], (instrs ADD, ADDi, ADDiu, ADDu, AND, ANDi, SRLV, SSNOP, SUB, SUBu, WSBH, XOR, XORi)>; def : InstRW<[GenericWriteALU], (instrs COPY)>; +def : InstRW<[GenericWriteALU], (instrs PRED_COPY)>; // MIPSR6 // ====== diff --git a/llvm/lib/Target/Mips/MipsScheduleP5600.td b/llvm/lib/Target/Mips/MipsScheduleP5600.td index 466b5c6af6963..340d7b8b30fe8 100644 --- a/llvm/lib/Target/Mips/MipsScheduleP5600.td +++ b/llvm/lib/Target/Mips/MipsScheduleP5600.td @@ -223,7 +223,7 @@ def P5600WriteEitherALU : SchedWriteVariant< // xori def : InstRW<[P5600WriteEitherALU], (instrs ADD, ADDi, ADDiu, ANDi, ORi, ROTR, SEB, SEH, SLT, SLTu, SLL, SRA, SRL, XORi, - ADDu, SLLV, SRAV, SRLV, LSA, COPY)>; + ADDu, SLLV, SRAV, SRLV, LSA, COPY, PRED_COPY)>; // FPU Pipelines // ============= diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp index fb0aa397d393c..aa6929cb2163e 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" #include "Mips16ISelDAGToDAG.h" +#include "MipsMachineFunction.h" #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" @@ -291,6 +292,12 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(MipsTTIImpl(this, F)); } +MachineFunctionInfo *MipsTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return MipsFunctionInfo::create(Allocator, F, STI); +} + // Implemented by targets that want to run passes immediately before // machine code is emitted. void MipsPassConfig::addPreEmitPass() { diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.h b/llvm/lib/Target/Mips/MipsTargetMachine.h index 46ffc11738df8..790fef59ab9e0 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -63,6 +63,10 @@ class MipsTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Mips doesn't have any special address spaces so we just reserve diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index 40f6adc964d7e..cd068a0939300 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -41,11 +41,14 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { * virtual void storeRegToStackSlot(MachineBasicBlock &MBB, * MachineBasicBlock::iterator MBBI, * unsigned SrcReg, bool isKill, int FrameIndex, - * const TargetRegisterClass *RC) const; + * const TargetRegisterClass *RC, + * Register VReg) const; * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, * MachineBasicBlock::iterator MBBI, * unsigned DestReg, int FrameIndex, - * const TargetRegisterClass *RC) const; + * const TargetRegisterClass *RC, + * const TargetRegisterInfo *TRI, + * Register VReg) const; */ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 0a7b9cf468a65..77426f7f6da71 100644 --- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -24,7 +24,7 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo { SmallVector ImageHandleList; public: - NVPTXMachineFunctionInfo(MachineFunction &MF) {} + NVPTXMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 597b8af176a2a..c52dec1ab8b01 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -15,6 +15,7 @@ #include "NVPTXAllocaHoisting.h" #include "NVPTXAtomicLower.h" #include "NVPTXLowerAggrCopies.h" +#include "NVPTXMachineFunctionInfo.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" #include "TargetInfo/NVPTXTargetInfo.h" @@ -210,6 +211,13 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } +MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return NVPTXMachineFunctionInfo::create(Allocator, + F, STI); +} + void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &PM, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index 491e721479d33..2474bd054d087 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -63,6 +63,10 @@ class NVPTXTargetMachine : public LLVMTargetMachine { } void adjustPassManager(PassManagerBuilder &) override; + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + void registerPassBuilderCallbacks(PassBuilder &PB) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 232853635e0ae..0627a5350197a 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -1011,7 +1011,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read], NAND8_rec, NAND_rec, NOR, NOR8, NOR8_rec, NOR_rec, - COPY, OR, OR8, + COPY, PRED_COPY, OR, OR8, OR8_rec, OR_rec, ORC, ORC8, ORC8_rec, ORC_rec, diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index ac20dd353c849..fba5fa7395e6f 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -153,6 +153,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "ADDI(S)?toc(HA|L)(8)?$"), (instregex "LA(8)?$"), COPY, + PRED_COPY, MCRF, MCRXRX, XSNABSDP, diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 4138ea36856b3..45f7312a64679 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2475,8 +2475,8 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, TRI); else - TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), - RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, + TRI, Register()); } } } @@ -2648,7 +2648,8 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters( TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); else - TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI, + Register()); assert(I != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 42685e7c489aa..abea5e97d8c26 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1973,12 +1973,10 @@ void PPCInstrInfo::storeRegToStackSlotNoUpd( NewMIs.back()->addMemOperand(MF, MMO); } -void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void PPCInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register // using a VSX instruction. The issue with this is that the VSX @@ -2037,7 +2035,8 @@ void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { // We need to avoid a situation in which the value from a VRRC register is // spilled using an Altivec instruction and reloaded into a VSRC register // using a VSX instruction. The issue with this is that the VSX diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 26e9d0e52d573..e12656d4c550a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -542,10 +542,11 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; // Emits a register spill without updating the register class for vector // registers. This ensures that when we spill a vector register the @@ -557,10 +558,10 @@ class PPCInstrInfo : public PPCGenInstrInfo { const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; // Emits a register reload without updating the register class for vector // registers. This ensures that when we reload a vector register the diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 9d6dfd16ff9d3..b2089521c4674 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -20,7 +20,8 @@ static cl::opt PPCDisableNonVolatileCR( cl::init(false), cl::Hidden); void PPCFunctionInfo::anchor() {} -PPCFunctionInfo::PPCFunctionInfo(const MachineFunction &MF) +PPCFunctionInfo::PPCFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) : DisableNonVolatileCR(PPCDisableNonVolatileCR) {} MachineFunctionInfo * diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index b918e723de000..df655a3be9512 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -151,7 +151,7 @@ class PPCFunctionInfo : public MachineFunctionInfo { std::vector> LiveInAttrs; public: - explicit PPCFunctionInfo(const MachineFunction &MF); + explicit PPCFunctionInfo(const Function &F, const TargetSubtargetInfo *STI); MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index fe396cbfc011d..8c859e67e6e4b 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -13,6 +13,7 @@ #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" +#include "PPCMachineFunctionInfo.h" #include "PPCMachineScheduler.h" #include "PPCMacroFusion.h" #include "PPCSubtarget.h" @@ -587,6 +588,12 @@ bool PPCTargetMachine::isLittleEndian() const { return Endianness == Endian::LITTLE; } +MachineFunctionInfo *PPCTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return PPCFunctionInfo::create(Allocator, F, STI); +} + static MachineSchedRegistry PPCPreRASchedRegistry("ppc-prera", "Run PowerPC PreRA specific scheduler", diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h index bafb79c849429..6071eb2dec6b4 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -56,6 +56,11 @@ class PPCTargetMachine final : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; } bool isPPC64() const { const Triple &TT = getTargetTriple(); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b7aefe12a9cb3..69abdd67adae3 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1228,7 +1228,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI); + RC, TRI, Register()); } return true; @@ -1256,7 +1256,8 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( for (auto &CS : NonLibcallCSI) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index cdc1f8f0a7c24..a9314390f8064 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10395,7 +10395,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, int FI = MF.getInfo()->getMoveF64FrameIndex(MF); TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, - RI); + RI, Register()); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMOLo = MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); @@ -10443,7 +10443,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, .addFrameIndex(FI) .addImm(4) .addMemOperand(MMOHi); - TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); + TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index c05527cff95d9..28e3487174f5b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -453,7 +453,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -547,7 +548,8 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -998,14 +1000,14 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, report_fatal_error("underestimated function size"); storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, - &RISCV::GPRRegClass, TRI); + &RISCV::GPRRegClass, TRI, Register()); TRI->eliminateFrameIndex(std::prev(MI.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); MI.getOperand(1).setMBB(&RestoreBB); loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, - &RISCV::GPRRegClass, TRI); + &RISCV::GPRRegClass, TRI, Register()); TRI->eliminateFrameIndex(RestoreBB.back(), /*SpAdj=*/0, /*FIOperandNum=*/1); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index fe95bb2fc4d13..e12a254dcf5ed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -62,12 +62,14 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { MachineBasicBlock::iterator MBBI, Register SrcReg, bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; using TargetInstrInfo::foldMemoryOperandImpl; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index 8b5326de2ad3c..732a3f5b7db85 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -70,7 +70,7 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { SmallVector SExt32Registers; public: - RISCVMachineFunctionInfo(const MachineFunction &MF) {} + RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index e39585ff08412..f9a5f7d968d2e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -175,6 +175,7 @@ def : WriteRes; def : WriteRes; def : InstRW<[WriteIALU], (instrs COPY)>; +def : InstRW<[WriteIALU], (instrs PRED_COPY)>; //===----------------------------------------------------------------------===// // Bypass and advance diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 17df9e212eb81..bd52513a653cd 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -168,6 +168,7 @@ def : WriteRes; def : WriteRes; def : InstRW<[WriteIALU], (instrs COPY)>; +def : InstRW<[WriteIALU], (instrs PRED_COPY)>; //===----------------------------------------------------------------------===// // Bypass and advance diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 2f5ea98c7d4d0..7c89f1273568b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -130,6 +130,13 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return RISCVMachineFunctionInfo::create(Allocator, + F, STI); +} + TargetTransformInfo RISCVTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(RISCVTTIImpl(this, F)); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h index 4b2a403c5c5b5..ee760cc7f85c9 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -42,6 +42,10 @@ class RISCVTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 595670735c74b..4df8dd328f89d 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -391,11 +391,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MovMI->addRegisterKilled(SrcReg, TRI); } -void SparcInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SparcInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -430,11 +431,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Can't store this register to stack slot"); } -void SparcInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - Register DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SparcInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h index b25de8e5a6909..39cf791c21730 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.h +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h @@ -85,16 +85,17 @@ class SparcInstrInfo : public SparcGenInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; Register getGlobalBaseReg(MachineFunction *MF) const; diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h index e1a1568d28a2c..f73cd6d1b1317 100644 --- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -34,9 +34,9 @@ namespace llvm { SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), IsLeafProc(false) {} - explicit SparcMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), - IsLeafProc(false) {} + SparcMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index 8bd51a703d47f..1d0b7e5ba1d40 100644 --- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -12,6 +12,7 @@ #include "SparcTargetMachine.h" #include "LeonPasses.h" #include "Sparc.h" +#include "SparcMachineFunctionInfo.h" #include "SparcTargetObjectFile.h" #include "TargetInfo/SparcTargetInfo.h" #include "llvm/CodeGen/Passes.h" @@ -134,6 +135,13 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +MachineFunctionInfo *SparcTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return SparcMachineFunctionInfo::create(Allocator, + F, STI); +} + namespace { /// Sparc Code Generator Pass Configuration Options. class SparcPassConfig : public TargetPassConfig { diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.h b/llvm/lib/Target/Sparc/SparcTargetMachine.h index 4083f61433b16..b78929a6f4803 100644 --- a/llvm/lib/Target/Sparc/SparcTargetMachine.h +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.h @@ -39,6 +39,10 @@ class SparcTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; /// Sparc 32-bit target machine diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 6e2797d725e04..d7a2a51d4652f 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -370,12 +370,12 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI); + &SystemZ::FP64BitRegClass, TRI, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI); + &SystemZ::VR128BitRegClass, TRI, Register()); } } @@ -399,10 +399,10 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( Register Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI); + &SystemZ::FP64BitRegClass, TRI, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI); + &SystemZ::VR128BitRegClass, TRI, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at @@ -1113,12 +1113,12 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI); + &SystemZ::FP64BitRegClass, TRI, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI); + &SystemZ::VR128BitRegClass, TRI, Register()); } } @@ -1145,10 +1145,10 @@ bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( Register Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI); + &SystemZ::FP64BitRegClass, TRI, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI); + &SystemZ::VR128BitRegClass, TRI, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2bb49091e2c15..ff82a63214cd6 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -869,7 +869,7 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void SystemZInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, Register VReg) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -881,10 +881,12 @@ void SystemZInstrInfo::storeRegToStackSlot( FrameIdx); } -void SystemZInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 0525f58277365..9ce75db6c177c 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -262,15 +262,16 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, Register DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override; MachineInstr * diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index de73a5d864224..333195989a117 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -37,9 +37,9 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned NumLocalDynamics; public: - explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), - RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {} + SystemZMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), + RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index fd01a8a941c9d..1f3eff0fefbc1 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -189,6 +189,7 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index 3f406736a71ff..1016f24b93dcf 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -190,6 +190,7 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index 6ae911c3f3ebe..0727f4402a468 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -191,6 +191,7 @@ def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td index ca688671a7e2e..eb664d887df3e 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td @@ -191,6 +191,7 @@ def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 173cf960d2bd0..17c2f951482fd 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -168,6 +168,7 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXU, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index d2060471d65ed..a1794e3523aa7 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -173,6 +173,7 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[WLat1, FXU, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "PRED_COPY$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "EXTRACT_SUBREG$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "INSERT_SUBREG$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "REG_SEQUENCE$")>; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 8c1be7d4949da..ca5645aa175bb 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -9,6 +9,7 @@ #include "SystemZTargetMachine.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" +#include "SystemZMachineFunctionInfo.h" #include "SystemZMachineScheduler.h" #include "SystemZTargetTransformInfo.h" #include "TargetInfo/SystemZTargetInfo.h" @@ -310,3 +311,10 @@ TargetTransformInfo SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(SystemZTTIImpl(this, F)); } + +MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return SystemZMachineFunctionInfo::create( + Allocator, F, STI); +} diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index 2cdb33a5064b8..cb39983e3f38b 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -50,6 +50,10 @@ class SystemZTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool targetSchedulesPostRAScheduling() const override { return true; }; }; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index 46bb85606a629..ebb9e21389c37 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -460,7 +460,8 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -522,7 +523,8 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h index ed1f491821501..4fe56f24116f8 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -92,12 +92,14 @@ class VEInstrInfo : public VEGenInstrInfo { MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; /// } Stack Spill & Reload /// Optimization { diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h index d9d30ad5b8c54..5c02315d51759 100644 --- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -30,7 +30,7 @@ class VEMachineFunctionInfo : public MachineFunctionInfo { public: VEMachineFunctionInfo() : VarArgsFrameOffset(0), IsLeafProc(false) {} - explicit VEMachineFunctionInfo(MachineFunction &MF) + VEMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) : VarArgsFrameOffset(0), IsLeafProc(false) {} MachineFunctionInfo * diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp index d7c1457fb0a81..22ffb9506b8cb 100644 --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -12,6 +12,7 @@ #include "VETargetMachine.h" #include "TargetInfo/VETargetInfo.h" #include "VE.h" +#include "VEMachineFunctionInfo.h" #include "VETargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -97,6 +98,13 @@ VETargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(VETTIImpl(this, F)); } +MachineFunctionInfo *VETargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return VEMachineFunctionInfo::create(Allocator, F, + STI); +} + namespace { /// VE Code Generator Pass Configuration Options. class VEPassConfig : public TargetPassConfig { diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h index 9cf194444aa53..c08ab938c4125 100644 --- a/llvm/lib/Target/VE/VETargetMachine.h +++ b/llvm/lib/Target/VE/VETargetMachine.h @@ -47,6 +47,10 @@ class VETargetMachine : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool isMachineVerifierClean() const override { return false; } TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index 96284687971c0..7207fbeb305ad 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -28,10 +28,9 @@ MachineFunctionInfo *WebAssemblyFunctionInfo::clone( BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) const { - WebAssemblyFunctionInfo *Clone = - DestMF.cloneInfo(*this); - Clone->MF = &DestMF; - return Clone; + // TODO: Implement cloning for WasmEHFuncInfo. This will have invalid block + // references. + return DestMF.cloneInfo(*this); } void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) { @@ -122,11 +121,8 @@ llvm::signatureFromMVTs(const SmallVectorImpl &Results, } yaml::WebAssemblyFunctionInfo::WebAssemblyFunctionInfo( - const llvm::WebAssemblyFunctionInfo &MFI) + const llvm::MachineFunction &MF, const llvm::WebAssemblyFunctionInfo &MFI) : CFGStackified(MFI.isCFGStackified()) { - auto *EHInfo = MFI.getWasmEHFuncInfo(); - const llvm::MachineFunction &MF = MFI.getMachineFunction(); - for (auto VT : MFI.getParams()) Params.push_back(EVT(VT).getEVTString()); for (auto VT : MFI.getResults()) @@ -134,7 +130,8 @@ yaml::WebAssemblyFunctionInfo::WebAssemblyFunctionInfo( // MFI.getWasmEHFuncInfo() is non-null only for functions with the // personality function. - if (EHInfo) { + + if (auto *EHInfo = MF.getWasmEHFuncInfo()) { // SrcToUnwindDest can contain stale mappings in case BBs are removed in // optimizations, in case, for example, they are unreachable. We should not // include their info. @@ -155,15 +152,19 @@ void yaml::WebAssemblyFunctionInfo::mappingImpl(yaml::IO &YamlIO) { } void WebAssemblyFunctionInfo::initializeBaseYamlFields( - const yaml::WebAssemblyFunctionInfo &YamlMFI) { + MachineFunction &MF, const yaml::WebAssemblyFunctionInfo &YamlMFI) { CFGStackified = YamlMFI.CFGStackified; for (auto VT : YamlMFI.Params) addParam(WebAssembly::parseMVT(VT.Value)); for (auto VT : YamlMFI.Results) addResult(WebAssembly::parseMVT(VT.Value)); - if (WasmEHInfo) { + + // FIXME: WasmEHInfo is defined in the MachineFunction, but serialized + // here. Either WasmEHInfo should be moved out of MachineFunction, or the + // serialization handling should be moved to MachineFunction. + if (WasmEHFuncInfo *WasmEHInfo = MF.getWasmEHFuncInfo()) { for (auto KV : YamlMFI.SrcToUnwindDest) - WasmEHInfo->setUnwindDest(MF->getBlockNumbered(KV.first), - MF->getBlockNumbered(KV.second)); + WasmEHInfo->setUnwindDest(MF.getBlockNumbered(KV.first), + MF.getBlockNumbered(KV.second)); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 619617049bb21..7622164449a53 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -31,8 +31,6 @@ struct WebAssemblyFunctionInfo; /// This class is derived from MachineFunctionInfo and contains private /// WebAssembly-specific information for each MachineFunction. class WebAssemblyFunctionInfo final : public MachineFunctionInfo { - const MachineFunction *MF; - std::vector Params; std::vector Results; std::vector Locals; @@ -66,12 +64,9 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { // Function properties. bool CFGStackified = false; - // Catchpad unwind destination info for wasm EH. - WasmEHFuncInfo *WasmEHInfo = nullptr; - public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF_) - : MF(&MF_), WasmEHInfo(MF_.getWasmEHFuncInfo()) {} + explicit WebAssemblyFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} ~WebAssemblyFunctionInfo() override; MachineFunctionInfo * @@ -79,9 +74,8 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { const DenseMap &Src2DstMBB) const override; - const MachineFunction &getMachineFunction() const { return *MF; } - - void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI); + void initializeBaseYamlFields(MachineFunction &MF, + const yaml::WebAssemblyFunctionInfo &YamlMFI); void addParam(MVT VT) { Params.push_back(VT); } const std::vector &getParams() const { return Params; } @@ -166,9 +160,6 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { bool isCFGStackified() const { return CFGStackified; } void setCFGStackified(bool Value = true) { CFGStackified = Value; } - - WasmEHFuncInfo *getWasmEHFuncInfo() const { return WasmEHInfo; } - void setWasmEHFuncInfo(WasmEHFuncInfo *Info) { WasmEHInfo = Info; } }; void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, @@ -205,7 +196,8 @@ struct WebAssemblyFunctionInfo final : public yaml::MachineFunctionInfo { BBNumberMap SrcToUnwindDest; WebAssemblyFunctionInfo() = default; - WebAssemblyFunctionInfo(const llvm::WebAssemblyFunctionInfo &MFI); + WebAssemblyFunctionInfo(const llvm::MachineFunction &MF, + const llvm::WebAssemblyFunctionInfo &MFI); void mappingImpl(yaml::IO &YamlIO) override; ~WebAssemblyFunctionInfo() = default; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 76f036358ae89..f8ad0909500d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -336,6 +336,13 @@ class WebAssemblyPassConfig final : public TargetPassConfig { }; } // end anonymous namespace +MachineFunctionInfo *WebAssemblyTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return WebAssemblyFunctionInfo::create(Allocator, F, + STI); +} + TargetTransformInfo WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); @@ -585,7 +592,7 @@ WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); - return new yaml::WebAssemblyFunctionInfo(*MFI); + return new yaml::WebAssemblyFunctionInfo(MF, *MFI); } bool WebAssemblyTargetMachine::parseMachineFunctionInfo( @@ -593,6 +600,6 @@ bool WebAssemblyTargetMachine::parseMachineFunctionInfo( SMDiagnostic &Error, SMRange &SourceRange) const { const auto &YamlMFI = static_cast(MFI); MachineFunction &MF = PFS.MF; - MF.getInfo()->initializeBaseYamlFields(YamlMFI); + MF.getInfo()->initializeBaseYamlFields(MF, YamlMFI); return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index 5d5378f765677..77b6312c0f248 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -46,6 +46,10 @@ class WebAssemblyTargetMachine final : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; bool usesPhysRegsForValues() const override { return false; } diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp index 7e5540022cc8e..69b2adcd5f9e8 100644 --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -208,7 +208,8 @@ void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before, const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); // Don't need shape information for tile store, becasue it is adjacent to // the tile def instruction. - TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI); + TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI, + Register()); ++NumStores; // TODO: update DBG_VALUEs diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index b6fa3d5d4e543..469174552d3ae 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2741,7 +2741,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters( MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); - TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI, + Register()); --MI; MI->setFlag(MachineInstr::FrameSetup); ++MI; @@ -2817,7 +2818,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters( VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); - TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI, + Register()); } // POP GPRs. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7d4d775fb24b0..be6696e5c1be5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3,6 +3,8 @@ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Modifications Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Notified per clause 4(b) of the license. // //===----------------------------------------------------------------------===// // diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 381fb117c7e49..0dab0e445417d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3928,12 +3928,10 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB, } } -void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void X86InstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && @@ -3965,7 +3963,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + Register VReg) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 3a185cdd63a27..287b132d79346 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -367,12 +367,14 @@ class X86InstrInfo final : public X86GenInstrInfo { MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Opc, Register Reg, int FrameIdx, diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 99cc9f525b2cb..8d946c15b78cc 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -132,9 +132,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { public: X86MachineFunctionInfo() = default; + X86MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} - explicit X86MachineFunctionInfo(MachineFunction &MF) {} - explicit X86MachineFunctionInfo(const X86MachineFunctionInfo &) = default; + X86MachineFunctionInfo(const X86MachineFunctionInfo &) = default; MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index e4b95cb0807f2..401159293896b 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -122,6 +122,7 @@ defm : X86WriteRes; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 49858ca0a8001..19a24227ac648 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -114,6 +114,7 @@ def : WriteRes; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 8ae8e574f87a3..764f8a4aec426 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -176,6 +176,7 @@ defm : X86WriteResUnsupported; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; //////////////////////////////////////////////////////////////////////////////// // Idioms that clear a register, like xorps %xmm0, %xmm0. diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 61424208a4f56..697bbf2b66fae 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -278,6 +278,7 @@ def : WriteRes { let NumMicroOps = 2; let ResourceCycle // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; //////////////////////////////////////////////////////////////////////////////// // Idioms that clear a register, like xorps %xmm0, %xmm0. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 2c8759e2d1555..dd3698b6abe97 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -97,6 +97,7 @@ def : WriteRes { let Latency = 3; } // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 43f93773c320a..f26cfbd704f48 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -228,6 +228,7 @@ defm : ZnWriteResPair; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; // BMI1 BEXTR, BMI2 BZHI defm : ZnWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index b4f72a968959b..3c67c5ac202b7 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -227,6 +227,7 @@ defm : Zn2WriteResPair; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; // BMI1 BEXTR, BMI2 BZHI defm : Zn2WriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td index 02f7f8376fdb7..73234811e7993 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver3.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -512,6 +512,7 @@ defm : Zn3WriteResInt; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : InstRW<[WriteMove], (instrs PRED_COPY)>; def Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { let Latency = Znver3Model.LoadLatency; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 47bad07e122b7..573d5039f27ac 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" +#include "X86MachineFunctionInfo.h" #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetObjectFile.h" @@ -425,6 +426,13 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(*this, PM); } +MachineFunctionInfo *X86TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return X86MachineFunctionInfo::create(Allocator, F, + STI); +} + void X86PassConfig::addIRPasses() { addPass(createAtomicExpandPass()); diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index 70df8da776413..8b401e70e8413 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -53,6 +53,10 @@ class X86TargetMachine final : public LLVMTargetMachine { return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool isJIT() const { return IsJIT; } bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 2fb06e29bf3b5..8cb9413f96526 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -435,7 +435,8 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters( // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI, + Register()); if (emitFrameMoves) { auto Store = MI; --Store; @@ -460,7 +461,8 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters( "LR & FP are always handled in emitEpilogue"); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSR.getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSR.getFrameIdx(), RC, TRI, + Register()); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index 1b53d593c1309..46e46d12f0d47 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -355,13 +355,10 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Impossible reg-to-reg copy"); } -void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - Register SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ +void XCoreInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { DebugLoc DL; if (I != MBB.end() && !I->isDebugInstr()) DL = I->getDebugLoc(); @@ -382,8 +379,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ + const TargetRegisterInfo *TRI, + Register VReg) const { DebugLoc DL; if (I != MBB.end() && !I->isDebugInstr()) DL = I->getDebugLoc(); diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.h b/llvm/lib/Target/XCore/XCoreInstrInfo.h index 1fbb293bde602..9bf7e2dcccb7d 100644 --- a/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -67,16 +67,17 @@ class XCoreInstrInfo : public XCoreGenInstrInfo { bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + Register VReg) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; bool reverseBranchCondition( SmallVectorImpl &Cond) const override; diff --git a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h index 6cdb1239750a2..a89b4f786eb6e 100644 --- a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -43,7 +43,8 @@ class XCoreFunctionInfo : public MachineFunctionInfo { public: XCoreFunctionInfo() = default; - explicit XCoreFunctionInfo(MachineFunction &MF) {} + explicit XCoreFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 3c27fcd9ba535..1bbb4c41e3e44 100644 --- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/XCoreMCTargetDesc.h" #include "TargetInfo/XCoreTargetInfo.h" #include "XCore.h" +#include "XCoreMachineFunctionInfo.h" #include "XCoreTargetObjectFile.h" #include "XCoreTargetTransformInfo.h" #include "llvm/ADT/Optional.h" @@ -111,3 +112,9 @@ TargetTransformInfo XCoreTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(XCoreTTIImpl(this, F)); } + +MachineFunctionInfo *XCoreTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return XCoreFunctionInfo::create(Allocator, F, STI); +} diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.h b/llvm/lib/Target/XCore/XCoreTargetMachine.h index a4754fd77e656..51fdbb6d874c2 100644 --- a/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -47,6 +47,10 @@ class XCoreTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index f7024f441a08c..eea32be667988 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -224,7 +224,7 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty, const TargetLibraryInfo *TLI, const DataLayout &DL, - AA::OffsetAndSize *OASPtr) { + AA::RangeTy *RangePtr) { if (isa(Obj)) return UndefValue::get(&Ty); if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty)) @@ -237,8 +237,8 @@ Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty, if (!GV->hasInitializer()) return UndefValue::get(&Ty); - if (OASPtr && !OASPtr->offsetOrSizeAreUnknown()) { - APInt Offset = APInt(64, OASPtr->Offset); + if (RangePtr && !RangePtr->offsetOrSizeAreUnknown()) { + APInt Offset = APInt(64, RangePtr->Offset); return ConstantFoldLoadFromConst(GV->getInitializer(), &Ty, Offset, DL); } @@ -401,7 +401,7 @@ static bool getPotentialCopiesOfMemoryValue( }; auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) + if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead())) return true; if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; @@ -452,11 +452,11 @@ static bool getPotentialCopiesOfMemoryValue( // object. bool HasBeenWrittenTo = false; - AA::OffsetAndSize OAS; + AA::RangeTy Range; auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, - HasBeenWrittenTo, &OAS)) { + HasBeenWrittenTo, Range)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " @@ -464,10 +464,10 @@ static bool getPotentialCopiesOfMemoryValue( return false; } - if (IsLoad && !HasBeenWrittenTo && !OAS.isUnassigned()) { + if (IsLoad && !HasBeenWrittenTo && !Range.isUnassigned()) { const DataLayout &DL = A.getDataLayout(); Value *InitialValue = - AA::getInitialValueForObj(*Obj, *I.getType(), TLI, DL, &OAS); + AA::getInitialValueForObj(*Obj, *I.getType(), TLI, DL, &Range); if (!InitialValue) { LLVM_DEBUG(dbgs() << "Could not determine required initial value of " "underlying object, abort!\n"); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5a4f3a6d6a5d9..7eaf75f249e4e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -721,25 +721,25 @@ struct DenseMapInfo : DenseMapInfo { static bool isEqual(const Access &LHS, const Access &RHS); }; -/// Helper that allows OffsetAndSize as a key in a DenseMap. -template <> struct DenseMapInfo { - static inline AA::OffsetAndSize getEmptyKey() { +/// Helper that allows RangeTy as a key in a DenseMap. +template <> struct DenseMapInfo { + static inline AA::RangeTy getEmptyKey() { auto EmptyKey = DenseMapInfo::getEmptyKey(); - return AA::OffsetAndSize{EmptyKey, EmptyKey}; + return AA::RangeTy{EmptyKey, EmptyKey}; } - static inline AA::OffsetAndSize getTombstoneKey() { + static inline AA::RangeTy getTombstoneKey() { auto TombstoneKey = DenseMapInfo::getTombstoneKey(); - return AA::OffsetAndSize{TombstoneKey, TombstoneKey}; + return AA::RangeTy{TombstoneKey, TombstoneKey}; } - static unsigned getHashValue(const AA::OffsetAndSize &OAS) { + static unsigned getHashValue(const AA::RangeTy &Range) { return detail::combineHashValue( - DenseMapInfo::getHashValue(OAS.Offset), - DenseMapInfo::getHashValue(OAS.Size)); + DenseMapInfo::getHashValue(Range.Offset), + DenseMapInfo::getHashValue(Range.Size)); } - static bool isEqual(const AA::OffsetAndSize &A, const AA::OffsetAndSize B) { + static bool isEqual(const AA::RangeTy &A, const AA::RangeTy B) { return A == B; } }; @@ -759,13 +759,6 @@ struct AccessAsInstructionInfo : DenseMapInfo { /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { - - ~State() { - // We do not delete the Accesses objects but need to destroy them still. - for (auto &It : AccessBins) - It.second->~Accesses(); - } - /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -777,9 +770,7 @@ struct AA::PointerInfo::State : public AbstractState { } State() = default; - State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { - SIS.AccessBins.clear(); - } + State(State &&SIS) = default; const State &getAssumed() const { return *this; } @@ -805,7 +796,9 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; BS = R.BS; - AccessBins = R.AccessBins; + AccessList = R.AccessList; + OffsetBins = R.OffsetBins; + RemoteIMap = R.RemoteIMap; return *this; } @@ -813,114 +806,69 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; std::swap(BS, R.BS); - std::swap(AccessBins, R.AccessBins); + std::swap(AccessList, R.AccessList); + std::swap(OffsetBins, R.OffsetBins); + std::swap(RemoteIMap, R.RemoteIMap); return *this; } - bool operator==(const State &R) const { - if (BS != R.BS) - return false; - if (AccessBins.size() != R.AccessBins.size()) - return false; - auto It = begin(), RIt = R.begin(), E = end(); - while (It != E) { - if (It->getFirst() != RIt->getFirst()) - return false; - auto &Accs = It->getSecond(); - auto &RAccs = RIt->getSecond(); - if (Accs->size() != RAccs->size()) - return false; - for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) - if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) - return false; - ++It; - ++RIt; - } - return true; - } - bool operator!=(const State &R) const { return !(*this == R); } - - /// We store accesses in a set with the instruction as key. - struct Accesses { - SmallVector Accesses; - DenseMap Map; - - unsigned size() const { return Accesses.size(); } - - using vec_iterator = decltype(Accesses)::iterator; - vec_iterator begin() { return Accesses.begin(); } - vec_iterator end() { return Accesses.end(); } - - using iterator = decltype(Map)::const_iterator; - iterator find(AAPointerInfo::Access &Acc) { - return Map.find(Acc.getRemoteInst()); - } - iterator find_end() { return Map.end(); } - - AAPointerInfo::Access &get(iterator &It) { - return Accesses[It->getSecond()]; - } - - void insert(AAPointerInfo::Access &Acc) { - Map[Acc.getRemoteInst()] = Accesses.size(); - Accesses.push_back(Acc); - } - }; - - /// We store all accesses in bins denoted by their offset and size. - using AccessBinsTy = DenseMap; - - AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } - AccessBinsTy::const_iterator end() const { return AccessBins.end(); } - -protected: - /// The bins with all the accesses for the associated pointer. - AccessBinsTy AccessBins; - - /// Add a new access to the state at offset \p Offset and with size \p Size. + /// Add a new Access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and - /// is of kind \p Kind. + /// is of kind \p Kind. If an Access already exists for the same \p I and same + /// \p RemoteI, the two are combined, potentially losing information about + /// offset and size. The resulting access must now be moved from its original + /// OffsetBin to the bin for its new offset. + /// /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. - ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, + ChangeStatus addAccess(Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, - Instruction *RemoteI = nullptr, - Accesses *BinPtr = nullptr) { - AA::OffsetAndSize Key{Offset, Size}; - Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; - if (!Bin) - Bin = new (A.Allocator) Accesses; - AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); - // Check if we have an access for this instruction in this bin, if not, - // simply add it. - auto It = Bin->find(Acc); - if (It == Bin->find_end()) { - Bin->insert(Acc); - return ChangeStatus::CHANGED; - } - // If the existing access is the same as then new one, nothing changed. - AAPointerInfo::Access &Current = Bin->get(It); - AAPointerInfo::Access Before = Current; - // The new one will be combined with the existing one. - Current &= Acc; - return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + Instruction *RemoteI = nullptr); + + using OffsetBinsTy = DenseMap>; + + using const_bin_iterator = OffsetBinsTy::const_iterator; + const_bin_iterator begin() const { return OffsetBins.begin(); } + const_bin_iterator end() const { return OffsetBins.end(); } + + const AAPointerInfo::Access &getAccess(unsigned Index) const { + return AccessList[Index]; } +protected: + // Every memory instruction results in an Access object. We maintain a list of + // all Access objects that we own, along with the following maps: + // + // - OffsetBins: RangeTy -> { Access } + // - RemoteIMap: RemoteI x LocalI -> Access + // + // A RemoteI is any instruction that accesses memory. RemoteI is different + // from LocalI if and only if LocalI is a call; then RemoteI is some + // instruction in the callgraph starting from LocalI. Multiple paths in the + // callgraph from LocalI to RemoteI may produce multiple accesses, but these + // are all combined into a single Access object. This may result in loss of + // information in RangeTy in the Access object. + SmallVector AccessList; + OffsetBinsTy OffsetBins; + DenseMap> RemoteIMap; + /// See AAPointerInfo::forallInterferingAccesses. bool forallInterferingAccesses( - AA::OffsetAndSize OAS, + AA::RangeTy Range, function_ref CB) const { if (!isValidState()) return false; - for (const auto &It : AccessBins) { - AA::OffsetAndSize ItOAS = It.getFirst(); - if (!OAS.mayOverlap(ItOAS)) + for (const auto &It : OffsetBins) { + AA::RangeTy ItRange = It.getFirst(); + if (!Range.mayOverlap(ItRange)) continue; - bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto &Access : *It.getSecond()) - if (!CB(Access, IsExact)) + bool IsExact = Range == ItRange && !Range.offsetOrSizeAreUnknown(); + for (auto Index : It.getSecond()) { + auto &Access = AccessList[Index]; + if (!CB(Access, IsExact && Access.hasUniqueRange())) return false; + } } return true; } @@ -929,33 +877,23 @@ struct AA::PointerInfo::State : public AbstractState { bool forallInterferingAccesses( Instruction &I, function_ref CB, - AA::OffsetAndSize *OASPtr) const { + AA::RangeTy &Range) const { if (!isValidState()) return false; - // First find the offset and size of I. - AA::OffsetAndSize OAS; - for (const auto &It : AccessBins) { - for (auto &Access : *It.getSecond()) { - if (Access.getRemoteInst() == &I) { - OAS = It.getFirst(); + auto LocalList = RemoteIMap.find(&I); + if (LocalList == RemoteIMap.end()) { + return true; + } + + for (unsigned Index : LocalList->getSecond()) { + for (auto &R : AccessList[Index]) { + Range &= R; + if (Range.offsetOrSizeAreUnknown()) break; - } } - if (OAS.Size != AA::OffsetAndSize::Unassigned) - break; } - - if (OASPtr) - *OASPtr = OAS; - - // No access for I was found, we are done. - if (OAS.Size == AA::OffsetAndSize::Unassigned) - return true; - - // Now that we have an offset and size, find all overlapping ones and use - // the callback on the accesses. - return forallInterferingAccesses(OAS, CB); + return forallInterferingAccesses(Range, CB); } private: @@ -963,7 +901,140 @@ struct AA::PointerInfo::State : public AbstractState { BooleanState BS; }; +ChangeStatus AA::PointerInfo::State::addAccess( + Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, + Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, + Instruction *RemoteI) { + RemoteI = RemoteI ? RemoteI : &I; + + // Check if we have an access for this instruction, if not, simply add it. + auto &LocalList = RemoteIMap[RemoteI]; + bool AccExists = false; + unsigned AccIndex = AccessList.size(); + for (auto Index : LocalList) { + auto &A = AccessList[Index]; + if (A.getLocalInst() == &I) { + AccExists = true; + AccIndex = Index; + break; + } + } + + auto AddToBins = [&](const AAPointerInfo::RangeList &ToAdd) { + LLVM_DEBUG( + if (ToAdd.size()) + dbgs() << "[AAPointerInfo] Inserting access in new offset bins\n"; + ); + + for (auto Key : ToAdd) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); + OffsetBins[Key].insert(AccIndex); + } + }; + + if (!AccExists) { + AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty); + assert((AccessList.size() == AccIndex + 1) && + "New Access should have been at AccIndex"); + LocalList.push_back(AccIndex); + AddToBins(AccessList[AccIndex].getRanges()); + return ChangeStatus::CHANGED; + } + + // Combine the new Access with the existing Access, and then update the + // mapping in the offset bins. + AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty); + auto &Current = AccessList[AccIndex]; + auto Before = Current; + Current &= Acc; + if (Current == Before) + return ChangeStatus::UNCHANGED; + + auto &ExistingRanges = Before.getRanges(); + auto &NewRanges = Current.getRanges(); + + // Ranges that are in the old access but not the new access need to be removed + // from the offset bins. + AAPointerInfo::RangeList ToRemove; + AAPointerInfo::RangeList::set_difference(ExistingRanges, NewRanges, ToRemove); + LLVM_DEBUG( + if (ToRemove.size()) + dbgs() << "[AAPointerInfo] Removing access from old offset bins\n"; + ); + + for (auto Key : ToRemove) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); + assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); + auto &Bin = OffsetBins[Key]; + assert(Bin.count(AccIndex) && + "Expected bin to actually contain the Access."); + Bin.erase(AccIndex); + } + + // Ranges that are in the new access but not the old access need to be added + // to the offset bins. + AAPointerInfo::RangeList ToAdd; + AAPointerInfo::RangeList::set_difference(NewRanges, ExistingRanges, ToAdd); + AddToBins(ToAdd); + return ChangeStatus::CHANGED; +} + namespace { + +/// A helper containing a list of offsets computed for a Use. Ideally this +/// list should be strictly ascending, but we ensure that only when we +/// actually translate the list of offsets to a RangeList. +struct OffsetInfo { + using VecTy = SmallVector; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; + + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.size() == 0; } + + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) { + Offset += Inc; + } + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); } +}; + +static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { + ListSeparator LS; + OS << "["; + for (auto Offset : OI) { + OS << LS << Offset; + } + OS << "]"; + return OS; +} + struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -973,7 +1044,7 @@ struct AAPointerInfoImpl const std::string getAsStr() const override { return std::string("PointerInfo ") + (isValidState() ? (std::string("#") + - std::to_string(AccessBins.size()) + " bins") + std::to_string(OffsetBins.size()) + " bins") : ""); } @@ -983,16 +1054,16 @@ struct AAPointerInfoImpl } bool forallInterferingAccesses( - AA::OffsetAndSize OAS, + AA::RangeTy Range, function_ref CB) const override { - return State::forallInterferingAccesses(OAS, CB); + return State::forallInterferingAccesses(Range, CB); } bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB, bool &HasBeenWrittenTo, - AA::OffsetAndSize *OASPtr = nullptr) const override { + AA::RangeTy &Range) const override { HasBeenWrittenTo = false; SmallPtrSet DominatingWrites; @@ -1088,7 +1159,7 @@ struct AAPointerInfoImpl } auto AccessCB = [&](const Access &Acc, bool Exact) { - if ((!FindInterferingWrites || !Acc.isWrite()) && + if ((!FindInterferingWrites || !Acc.isWriteOrAssumption()) && (!FindInterferingReads || !Acc.isRead())) return true; @@ -1107,7 +1178,7 @@ struct AAPointerInfoImpl InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(I, AccessCB, OASPtr)) + if (!State::forallInterferingAccesses(I, AccessCB, Range)) return false; if (HasBeenWrittenTo) { @@ -1121,7 +1192,7 @@ struct AAPointerInfoImpl // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { - if ((!Acc.isWrite() || + if ((!Acc.isWriteOrAssumption() || !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, IsLiveInCalleeCB)) && (!Acc.isRead() || @@ -1161,40 +1232,63 @@ struct AAPointerInfoImpl return true; } - ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, - int64_t Offset, CallBase &CB, - bool FromCallee = false) { + ChangeStatus translateAndAddStateFromCallee(Attributor &A, + const AAPointerInfo &OtherAA, + CallBase &CB) { using namespace AA::PointerInfo; if (!OtherAA.getState().isValidState() || !isValidState()) return indicatePessimisticFixpoint(); const auto &OtherAAImpl = static_cast(OtherAA); - bool IsByval = - FromCallee && OtherAAImpl.getAssociatedArgument()->hasByValAttr(); + bool IsByval = OtherAAImpl.getAssociatedArgument()->hasByValAttr(); // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - for (const auto &It : OtherAAImpl.getState()) { - AA::OffsetAndSize OAS = AA::OffsetAndSize::getUnknown(); - if (Offset != AA::OffsetAndSize::Unknown) - OAS = AA::OffsetAndSize(It.first.Offset + Offset, It.first.Size); - Accesses *Bin = AccessBins.lookup(OAS); - for (const AAPointerInfo::Access &RAcc : *It.second) { + const auto &State = OtherAAImpl.getState(); + for (const auto &It : State) { + for (auto Index : It.getSecond()) { + const auto &RAcc = State.getAccess(Index); if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; AccessKind AK = RAcc.getKind(); - Optional Content = RAcc.getContent(); - if (FromCallee) { - Content = A.translateArgumentToCallSiteContent( - RAcc.getContent(), CB, *this, UsedAssumedInformation); - AK = - AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); - AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); + auto Content = A.translateArgumentToCallSiteContent( + RAcc.getContent(), CB, *this, UsedAssumedInformation); + AK = AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); + AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); + + Changed |= addAccess(A, RAcc.getRanges(), CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst()); + } + } + return Changed; + } + + ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, + const OffsetInfo &Offsets, CallBase &CB) { + using namespace AA::PointerInfo; + if (!OtherAA.getState().isValidState() || !isValidState()) + return indicatePessimisticFixpoint(); + + const auto &OtherAAImpl = static_cast(OtherAA); + + // Combine the accesses bin by bin. + ChangeStatus Changed = ChangeStatus::UNCHANGED; + const auto &State = OtherAAImpl.getState(); + for (const auto &It : State) { + for (auto Index : It.getSecond()) { + const auto &RAcc = State.getAccess(Index); + for (auto Offset : Offsets) { + auto NewRanges = Offset == AA::RangeTy::Unknown + ? AA::RangeTy::getUnknown() + : RAcc.getRanges(); + if (!NewRanges.isUnknown()) { + NewRanges.addToAllOffsets(Offset); + } + Changed |= + addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(), + RAcc.getType(), RAcc.getRemoteInst()); } - Changed = - Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst(), Bin); } } return Changed; @@ -1206,10 +1300,11 @@ struct AAPointerInfoImpl /// Dump the state into \p O. void dumpState(raw_ostream &O) { - for (auto &It : AccessBins) { + for (auto &It : OffsetBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size - << "] : " << It.getSecond()->size() << "\n"; - for (auto &Acc : *It.getSecond()) { + << "] : " << It.getSecond().size() << "\n"; + for (auto AccIndex : It.getSecond()) { + auto &Acc = AccessList[AccIndex]; O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) O << " --> " << *Acc.getRemoteInst() @@ -1231,268 +1326,403 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { : AAPointerInfoImpl(IRP, A) {} /// Deal with an access and signal if it was handled successfully. - bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, - Optional Content, AccessKind Kind, int64_t Offset, - ChangeStatus &Changed, Type *Ty, - int64_t Size = AA::OffsetAndSize::Unknown) { + bool handleAccess(Attributor &A, Instruction &I, + Optional Content, AccessKind Kind, + SmallVectorImpl &Offsets, ChangeStatus &Changed, + Type &Ty) { using namespace AA::PointerInfo; - // No need to find a size if one is given. - if (Size == AA::OffsetAndSize::Unknown && Ty) { - const DataLayout &DL = A.getDataLayout(); - TypeSize AccessSize = DL.getTypeStoreSize(Ty); - if (!AccessSize.isScalable()) - Size = AccessSize.getFixedSize(); - } - Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty); + auto Size = AA::RangeTy::Unknown; + const DataLayout &DL = A.getDataLayout(); + TypeSize AccessSize = DL.getTypeStoreSize(&Ty); + if (!AccessSize.isScalable()) + Size = AccessSize.getFixedSize(); + + // Make a strictly ascending list of offsets as required by addAccess() + llvm::sort(Offsets); + auto Last = std::unique(Offsets.begin(), Offsets.end()); + Offsets.erase(Last, Offsets.end()); + + Changed = Changed | addAccess(A, {Offsets, Size}, I, Content, Kind, &Ty); return true; }; - /// Helper struct, will support ranges eventually. - struct OffsetInfo { - int64_t Offset = AA::OffsetAndSize::Unassigned; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + void collectConstantsForGEP(Attributor &A, const DataLayout &DL, + OffsetInfo &UsrOI, const OffsetInfo &PtrOI, + const GEPOperator *GEP); + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); + } +}; + +/// If the indices to \p GEP can be traced to constants, incorporate all +/// of these into \p UsrOI. +void AAPointerInfoFloating::collectConstantsForGEP(Attributor &A, + const DataLayout &DL, + OffsetInfo &UsrOI, + const OffsetInfo &PtrOI, + const GEPOperator *GEP) { + unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); + MapVector VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + + assert(!UsrOI.isUnknown() && !PtrOI.isUnknown() && + "Don't look for constant values if the offset has already been " + "determined to be unknown."); + + if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { + UsrOI.setUnknown(); + return; + } - bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is " + << (VariableOffsets.empty() ? "" : "not") << " constant " + << *GEP << "\n"); + + auto Union = PtrOI; + Union.addToAll(ConstantOffset.getSExtValue()); + + // Each VI in VariableOffsets has a set of potential constant values. Every + // combination of elements, picked one each from these sets, is separately + // added to the original set of offsets, thus resulting in more offsets. + for (const auto &VI : VariableOffsets) { + auto &PotentialConstantsAA = A.getAAFor( + *this, IRPosition::value(*VI.first), DepClassTy::OPTIONAL); + if (!PotentialConstantsAA.isValidState()) { + UsrOI.setUnknown(); + return; + } + + auto &AssumedSet = PotentialConstantsAA.getAssumedSet(); + + // Nothing to pick if AssumedSet is empty, i.e., not yet discovered. + if (AssumedSet.empty()) + continue; + + OffsetInfo Product; + for (const auto &ConstOffset : AssumedSet) { + auto CopyPerOffset = Union; + CopyPerOffset.addToAll(ConstOffset.getSExtValue() * + VI.second.getZExtValue()); + Product.merge(CopyPerOffset); + } + Union = Product; + } + + UsrOI = std::move(Union); + return; +} + +ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { + using namespace AA::PointerInfo; + ChangeStatus Changed = ChangeStatus::UNCHANGED; + const DataLayout &DL = A.getDataLayout(); + Value &AssociatedValue = getAssociatedValue(); + + DenseMap OffsetInfoMap; + OffsetInfoMap[&AssociatedValue].insert(0); + + auto HandlePassthroughUser = [&](Value *Usr, const OffsetInfo &PtrOI, + bool &Follow) { + assert(!PtrOI.isUnassigned() && + "Cannot pass through if the input Ptr was not visited!"); + OffsetInfoMap[Usr] = PtrOI; + Follow = true; + return true; }; - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - using namespace AA::PointerInfo; - ChangeStatus Changed = ChangeStatus::UNCHANGED; - Value &AssociatedValue = getAssociatedValue(); + const auto *TLI = + getAnchorScope() + ? A.getInfoCache().getTargetLibraryInfoForFunction(*getAnchorScope()) + : nullptr; + auto UsePred = [&](const Use &U, bool &Follow) -> bool { + Value *CurPtr = U.get(); + User *Usr = U.getUser(); + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Analyze " << *CurPtr << " in " << *Usr + << "\n"); + assert(OffsetInfoMap.count(CurPtr) && + "The current pointer offset should have been seeded!"); + + if (ConstantExpr *CE = dyn_cast(Usr)) { + if (CE->isCast()) + return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); + if (CE->isCompare()) + return true; + if (!isa(CE)) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled constant user " << *CE + << "\n"); + return false; + } + } + if (auto *GEP = dyn_cast(Usr)) { + // Note the order here, the Usr access might change the map, CurPtr is + // already in it though. + auto &UsrOI = OffsetInfoMap[Usr]; + auto &PtrOI = OffsetInfoMap[CurPtr]; + + if (UsrOI.isUnknown()) + return true; - const DataLayout &DL = A.getDataLayout(); - DenseMap OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; - - auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI, - bool &Follow) { - assert(PtrOI.Offset != AA::OffsetAndSize::Unassigned && - "Cannot pass through if the input Ptr was not visited!"); - OffsetInfo &UsrOI = OffsetInfoMap[Usr]; - UsrOI = PtrOI; Follow = true; - return true; - }; - const auto *TLI = getAnchorScope() - ? A.getInfoCache().getTargetLibraryInfoForFunction( - *getAnchorScope()) - : nullptr; - auto UsePred = [&](const Use &U, bool &Follow) -> bool { - Value *CurPtr = U.get(); - User *Usr = U.getUser(); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Analyze " << *CurPtr << " in " - << *Usr << "\n"); - assert(OffsetInfoMap.count(CurPtr) && - "The current pointer offset should have been seeded!"); - - if (ConstantExpr *CE = dyn_cast(Usr)) { - if (CE->isCast()) - return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); - if (CE->isCompare()) - return true; - if (!isa(CE)) { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled constant user " << *CE - << "\n"); - return false; - } + if (PtrOI.isUnknown()) { + UsrOI.setUnknown(); + return true; } - if (auto *GEP = dyn_cast(Usr)) { - // Note the order here, the Usr access might change the map, CurPtr is - // already in it though. - OffsetInfo &UsrOI = OffsetInfoMap[Usr]; - OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; - UsrOI = PtrOI; - - // TODO: Use range information. - APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - if (PtrOI.Offset == AA::OffsetAndSize::Unknown || - !GEP->accumulateConstantOffset(DL, GEPOffset)) { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant " - << *GEP << "\n"); - UsrOI.Offset = AA::OffsetAndSize::Unknown; - Follow = true; - return true; - } - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP - << "\n"); - UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue(); - Follow = true; + collectConstantsForGEP(A, DL, UsrOI, PtrOI, GEP); + return true; + } + if (isa(Usr)) + return false; + if (isa(Usr) || isa(Usr) || isa(Usr)) + return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); + + // For PHIs we need to take care of the recurrence explicitly as the value + // might change while we iterate through a loop. For now, we give up if + // the PHI is not invariant. + if (isa(Usr)) { + // Note the order here, the Usr access might change the map, CurPtr is + // already in it though. + bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); + auto &UsrOI = OffsetInfoMap[Usr]; + auto &PtrOI = OffsetInfoMap[CurPtr]; + + // Check if the PHI operand has already an unknown offset as we can't + // improve on that anymore. + if (PtrOI.isUnknown()) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown " + << *CurPtr << " in " << *Usr << "\n"); + Follow = !UsrOI.isUnknown(); + UsrOI.setUnknown(); return true; } - if (isa(Usr)) - return false; - if (isa(Usr) || isa(Usr) || isa(Usr)) - return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); - // For PHIs we need to take care of the recurrence explicitly as the value - // might change while we iterate through a loop. For now, we give up if - // the PHI is not invariant. - if (isa(Usr)) { - // Note the order here, the Usr access might change the map, CurPtr is - // already in it though. - bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); - OffsetInfo &UsrOI = OffsetInfoMap[Usr]; - OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; - - // Check if the PHI operand has already an unknown offset as we can't - // improve on that anymore. - if (PtrOI.Offset == AA::OffsetAndSize::Unknown) { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown " - << *CurPtr << " in " << *Usr << "\n"); - Follow = UsrOI.Offset != AA::OffsetAndSize::Unknown; - UsrOI = PtrOI; - return true; - } + // Check if the PHI is invariant (so far). + if (UsrOI == PtrOI) { + assert(!PtrOI.isUnassigned() && + "Cannot assign if the current Ptr was not visited!"); + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)"); + return true; + } - // Check if the PHI is invariant (so far). - if (UsrOI == PtrOI) { - assert(PtrOI.Offset != AA::OffsetAndSize::Unassigned && - "Cannot assign if the current Ptr was not visited!"); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)"); - return true; + // Check if the PHI operand is not dependent on the PHI itself. + APInt Offset( + DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()), + 0); + Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true); + auto It = OffsetInfoMap.find(CurPtrBase); + if (It != OffsetInfoMap.end()) { + auto BaseOI = It->getSecond(); + BaseOI.addToAll(Offset.getZExtValue()); + if (IsFirstPHIUser || BaseOI == UsrOI) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant " << *CurPtr + << " in " << *Usr << "\n"); + return HandlePassthroughUser(Usr, PtrOI, Follow); } + LLVM_DEBUG( + dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + } else { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); + } - // Check if the PHI operand is not dependent on the PHI itself. - APInt Offset( - DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()), - 0); - Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets( - DL, Offset, /* AllowNonInbounds */ true); - auto It = OffsetInfoMap.find(CurPtrBase); - if (It != OffsetInfoMap.end()) { - Offset += It->getSecond().Offset; - if (IsFirstPHIUser || Offset == UsrOI.Offset) - return HandlePassthroughUser(Usr, PtrOI, Follow); - LLVM_DEBUG(dbgs() - << "[AAPointerInfo] PHI operand pointer offset mismatch " - << *CurPtr << " in " << *Usr << "\n"); - } else { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " - << *CurPtr << " in " << *Usr << "\n"); - } + // TODO: Approximate in case we know the direction of the recurrence. + UsrOI.setUnknown(); + Follow = true; + return true; + } - // TODO: Approximate in case we know the direction of the recurrence. - UsrOI = PtrOI; - UsrOI.Offset = AA::OffsetAndSize::Unknown; - Follow = true; - return true; - } + if (auto *LoadI = dyn_cast(Usr)) { + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be read. + AccessKind AK = AccessKind::AK_R; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); + if (!handleAccess(A, *LoadI, /* Content */ nullptr, AK, + OffsetInfoMap[CurPtr].Offsets, Changed, + *LoadI->getType())) + return false; - if (auto *LoadI = dyn_cast(Usr)) { - // If the access is to a pointer that may or may not be the associated - // value, e.g. due to a PHI, we cannot assume it will be read. - AccessKind AK = AccessKind::AK_R; - if (getUnderlyingObject(CurPtr) == &AssociatedValue) - AK = AccessKind(AK | AccessKind::AK_MUST); - else - AK = AccessKind(AK | AccessKind::AK_MAY); - return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AK, - OffsetInfoMap[CurPtr].Offset, Changed, - LoadI->getType()); - } + auto IsAssumption = [](Instruction &I) { + if (auto *II = dyn_cast(&I)) + return II->isAssumeLikeIntrinsic(); + return false; + }; - auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy, - ArrayRef OtherOps, AccessKind AK) { - for (auto *OtherOp : OtherOps) { - if (OtherOp == CurPtr) { - LLVM_DEBUG( - dbgs() - << "[AAPointerInfo] Escaping use in store like instruction " - << I << "\n"); + auto IsImpactedInRange = [&](Instruction *FromI, Instruction *ToI) { + // Check if the assumption and the load are executed together without + // memory modification. + do { + if (FromI->mayWriteToMemory() && !IsAssumption(*FromI)) + return true; + FromI = FromI->getNextNonDebugInstruction(); + } while (FromI && FromI != ToI); + return false; + }; + + BasicBlock *BB = LoadI->getParent(); + auto IsValidAssume = [&](IntrinsicInst &IntrI) { + if (IntrI.getIntrinsicID() != Intrinsic::assume) + return false; + BasicBlock *IntrBB = IntrI.getParent(); + if (IntrI.getParent() == BB) { + if (IsImpactedInRange(LoadI->getNextNonDebugInstruction(), &IntrI)) + return false; + } else { + auto PredIt = pred_begin(IntrBB); + if ((*PredIt) != BB) + return false; + if (++PredIt != pred_end(IntrBB)) + return false; + for (auto *SuccBB : successors(BB)) { + if (SuccBB == IntrBB) + continue; + if (isa(SuccBB->getTerminator())) + continue; return false; } + if (IsImpactedInRange(LoadI->getNextNonDebugInstruction(), + BB->getTerminator())) + return false; + if (IsImpactedInRange(&IntrBB->front(), &IntrI)) + return false; } - - // If the access is to a pointer that may or may not be the associated - // value, e.g. due to a PHI, we cannot assume it will be written. - if (getUnderlyingObject(CurPtr) == &AssociatedValue) - AK = AccessKind(AK | AccessKind::AK_MUST); - else - AK = AccessKind(AK | AccessKind::AK_MAY); - bool UsedAssumedInformation = false; - Optional Content = nullptr; - if (ValueOp) - Content = A.getAssumedSimplified( - *ValueOp, *this, UsedAssumedInformation, AA::Interprocedural); - return handleAccess(A, I, *CurPtr, Content, AK, - OffsetInfoMap[CurPtr].Offset, Changed, &ValueTy); + return true; }; - if (auto *StoreI = dyn_cast(Usr)) - return HandleStoreLike(*StoreI, StoreI->getValueOperand(), - *StoreI->getValueOperand()->getType(), - {StoreI->getValueOperand()}, AccessKind::AK_W); - if (auto *RMWI = dyn_cast(Usr)) - return HandleStoreLike(*RMWI, nullptr, - *RMWI->getValOperand()->getType(), - {RMWI->getValOperand()}, AccessKind::AK_RW); - if (auto *CXI = dyn_cast(Usr)) - return HandleStoreLike( - *CXI, nullptr, *CXI->getNewValOperand()->getType(), - {CXI->getCompareOperand(), CXI->getNewValOperand()}, - AccessKind::AK_RW); - - if (auto *CB = dyn_cast(Usr)) { - if (CB->isLifetimeStartOrEnd()) - return true; - if (getFreedOperand(CB, TLI) == U) - return true; - if (CB->isArgOperand(&U)) { - unsigned ArgNo = CB->getArgOperandNo(&U); - const auto &CSArgPI = A.getAAFor( - *this, IRPosition::callsite_argument(*CB, ArgNo), - DepClassTy::REQUIRED); - Changed = translateAndAddState(A, CSArgPI, - OffsetInfoMap[CurPtr].Offset, *CB) | - Changed; - return isValidState(); + std::pair Assumption; + for (const Use &LoadU : LoadI->uses()) { + if (auto *CmpI = dyn_cast(LoadU.getUser())) { + if (!CmpI->isEquality() || !CmpI->isTrueWhenEqual()) + continue; + for (const Use &CmpU : CmpI->uses()) { + if (auto *IntrI = dyn_cast(CmpU.getUser())) { + if (!IsValidAssume(*IntrI)) + continue; + int Idx = CmpI->getOperandUse(0) == LoadU; + Assumption = {CmpI->getOperand(Idx), IntrI}; + break; + } + } } - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB - << "\n"); - // TODO: Allow some call uses - return false; + if (Assumption.first) + break; } - LLVM_DEBUG(dbgs() << "[AAPointerInfo] User not handled " << *Usr << "\n"); - return false; - }; - auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { - if (OffsetInfoMap.count(NewU)) { - LLVM_DEBUG({ - if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { - dbgs() << "[AAPointerInfo] Equivalent use callback failed: " - << OffsetInfoMap[NewU].Offset << " vs " - << OffsetInfoMap[OldU].Offset << "\n"; - } - }); - return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + // Check if we found an assumption associated with this load. + if (!Assumption.first || !Assumption.second) + return true; + + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Assumption found " + << *Assumption.second << ": " << *LoadI + << " == " << *Assumption.first << "\n"); + + return handleAccess( + A, *Assumption.second, Assumption.first, AccessKind::AK_ASSUMPTION, + OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType()); + } + + auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy, + ArrayRef OtherOps, AccessKind AK) { + for (auto *OtherOp : OtherOps) { + if (OtherOp == CurPtr) { + LLVM_DEBUG( + dbgs() + << "[AAPointerInfo] Escaping use in store like instruction " << I + << "\n"); + return false; + } } - OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; - return true; + + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be written. + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); + bool UsedAssumedInformation = false; + Optional Content = nullptr; + if (ValueOp) + Content = A.getAssumedSimplified( + *ValueOp, *this, UsedAssumedInformation, AA::Interprocedural); + return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offsets, + Changed, ValueTy); }; - if (!A.checkForAllUses(UsePred, *this, AssociatedValue, - /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - /* IgnoreDroppableUses */ true, EquivalentUseCB)) { - LLVM_DEBUG( - dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n"); - return indicatePessimisticFixpoint(); - } - LLVM_DEBUG({ - dbgs() << "Accesses by bin after update:\n"; - dumpState(dbgs()); - }); + if (auto *StoreI = dyn_cast(Usr)) + return HandleStoreLike(*StoreI, StoreI->getValueOperand(), + *StoreI->getValueOperand()->getType(), + {StoreI->getValueOperand()}, AccessKind::AK_W); + if (auto *RMWI = dyn_cast(Usr)) + return HandleStoreLike(*RMWI, nullptr, *RMWI->getValOperand()->getType(), + {RMWI->getValOperand()}, AccessKind::AK_RW); + if (auto *CXI = dyn_cast(Usr)) + return HandleStoreLike( + *CXI, nullptr, *CXI->getNewValOperand()->getType(), + {CXI->getCompareOperand(), CXI->getNewValOperand()}, + AccessKind::AK_RW); + + if (auto *CB = dyn_cast(Usr)) { + if (CB->isLifetimeStartOrEnd()) + return true; + if (getFreedOperand(CB, TLI) == U) + return true; + if (CB->isArgOperand(&U)) { + unsigned ArgNo = CB->getArgOperandNo(&U); + const auto &CSArgPI = A.getAAFor( + *this, IRPosition::callsite_argument(*CB, ArgNo), + DepClassTy::REQUIRED); + Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr], *CB) | + Changed; + return isValidState(); + } + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB + << "\n"); + // TODO: Allow some call uses + return false; + } - return Changed; + LLVM_DEBUG(dbgs() << "[AAPointerInfo] User not handled " << *Usr << "\n"); + return false; + }; + auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { + assert(OffsetInfoMap.count(OldU) && "Old use should be known already!"); + if (OffsetInfoMap.count(NewU)) { + LLVM_DEBUG({ + if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { + dbgs() << "[AAPointerInfo] Equivalent use callback failed: " + << OffsetInfoMap[NewU] << " vs " << OffsetInfoMap[OldU] + << "\n"; + } + }); + return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + } + OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; + return true; + }; + if (!A.checkForAllUses(UsePred, *this, AssociatedValue, + /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, + /* IgnoreDroppableUses */ true, EquivalentUseCB)) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n"); + return indicatePessimisticFixpoint(); } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); - } -}; + LLVM_DEBUG({ + dbgs() << "Accesses by bin after update:\n"; + dumpState(dbgs()); + }); + + return Changed; +} struct AAPointerInfoReturned final : AAPointerInfoImpl { AAPointerInfoReturned(const IRPosition &IRP, Attributor &A) @@ -1538,24 +1768,21 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { // accessed. if (auto *MI = dyn_cast_or_null(getCtxI())) { ConstantInt *Length = dyn_cast(MI->getLength()); - int64_t LengthVal = AA::OffsetAndSize::Unknown; + int64_t LengthVal = AA::RangeTy::Unknown; if (Length) LengthVal = Length->getSExtValue(); - Value &Ptr = getAssociatedValue(); unsigned ArgNo = getIRPosition().getCallSiteArgNo(); ChangeStatus Changed = ChangeStatus::UNCHANGED; - if (ArgNo == 0) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_WRITE, 0, - Changed, nullptr, LengthVal); - } else if (ArgNo == 1) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_READ, 0, Changed, - nullptr, LengthVal); - } else { + if (ArgNo > 1) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); return indicatePessimisticFixpoint(); + } else { + auto Kind = + ArgNo == 0 ? AccessKind::AK_MUST_WRITE : AccessKind::AK_MUST_READ; + Changed = + Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind, nullptr); } - LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; dumpState(dbgs()); @@ -1574,8 +1801,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { auto &ArgAA = A.getAAFor(*this, ArgPos, DepClassTy::REQUIRED); if (ArgAA.getState().isValidState()) - return translateAndAddState(A, ArgAA, 0, *cast(getCtxI()), - /* FromCallee */ true); + return translateAndAddStateFromCallee(A, ArgAA, + *cast(getCtxI())); if (!Arg->getParent()->isDeclaration()) return indicatePessimisticFixpoint(); } @@ -1590,14 +1817,10 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { if (AA::isAssumedReadNone(A, getIRPosition(), *this, IsKnown)) return ChangeStatus::UNCHANGED; bool ReadOnly = AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown); - - ChangeStatus Changed = ChangeStatus::UNCHANGED; - handleAccess(A, *getCtxI(), getAssociatedValue(), nullptr, - ReadOnly ? AccessKind::AK_MAY_READ - : AccessKind::AK_MAY_READ_WRITE, - AA::OffsetAndSize::Unknown, Changed, nullptr, - AA::OffsetAndSize::Unknown); - return Changed; + auto Kind = + ReadOnly ? AccessKind::AK_MAY_READ : AccessKind::AK_MAY_READ_WRITE; + return addAccess(A, AA::RangeTy::getUnknown(), *getCtxI(), nullptr, Kind, + nullptr); } /// See AbstractAttribute::trackStatistics() @@ -6443,6 +6666,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { (!IsGlobalizedLocal && IsInLoop(*AI.CB->getParent())))) AI.MoveAllocaIntoEntry = false; + // If the alloca comes from a converted __kmpc_alloc_shared then we move + // it to the entry block. + if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) + AI.MoveAllocaIntoEntry = true; } return Changed; @@ -10182,10 +10409,20 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); else Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); - } else { + } else if (&SI == &getAssociatedValue()) { // We could not simplify the condition, assume both values. Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + } else { + Optional SimpleV = A.getAssumedSimplified( + IRPosition::inst(SI), *this, UsedAssumedInformation, II.S); + if (!SimpleV.has_value()) + return true; + if (*SimpleV) { + addValue(A, getState(), **SimpleV, CtxI, II.S, getAnchorScope()); + return true; + } + return false; } return true; } @@ -10270,16 +10507,28 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { return LI; }; - LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); - for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { - BasicBlock *IncomingBB = PHI.getIncomingBlock(u); - if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { - LI.AnyDead = true; - continue; + if (&PHI == &getAssociatedValue()) { + LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); + for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { + BasicBlock *IncomingBB = PHI.getIncomingBlock(u); + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { + LI.AnyDead = true; + continue; + } + Worklist.push_back( + {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); } - Worklist.push_back( - {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); + return true; } + + bool UsedAssumedInformation = false; + Optional SimpleV = A.getAssumedSimplified( + IRPosition::inst(PHI), *this, UsedAssumedInformation, II.S); + if (!SimpleV.has_value()) + return true; + if (!(*SimpleV)) + return false; + addValue(A, getState(), **SimpleV, &PHI, II.S, getAnchorScope()); return true; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a1f29cb69efc0..f66d5c6a8be77 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -28,10 +28,15 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); -/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +static cl::opt MaxCopiedFromConstantUsers( + "instcombine-max-copied-from-constant-users", cl::init(300), + cl::desc("Maximum users to visit in copy from constant transform"), + cl::Hidden); + +/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) /// pointer to an alloca. Ignore any reads of the pointer, return false if we /// see any stores or other unknown uses. If we see pointer arithmetic, keep /// track of whether it moves the pointer (with IsOffset) but otherwise traverse @@ -46,12 +51,19 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. - SmallVector, 35> ValuesToInspect; - ValuesToInspect.emplace_back(V, false); - while (!ValuesToInspect.empty()) { - auto ValuePair = ValuesToInspect.pop_back_val(); - const bool IsOffset = ValuePair.second; - for (auto &U : ValuePair.first->uses()) { + using ValueAndIsOffset = PointerIntPair; + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.emplace_back(V, false); + while (!Worklist.empty()) { + ValueAndIsOffset Elem = Worklist.pop_back_val(); + if (!Visited.insert(Elem).second) + continue; + if (Visited.size() > MaxCopiedFromConstantUsers) + return false; + + const auto [Value, IsOffset] = Elem; + for (auto &U : Value->uses()) { auto *I = cast(U.getUser()); if (auto *LI = dyn_cast(I)) { @@ -60,15 +72,22 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, continue; } + if (isa(I)) { + // We set IsOffset=true, to forbid the memcpy from occurring after the + // phi: If one of the phi operands is not based on the alloca, we + // would incorrectly omit a write. + Worklist.emplace_back(I, true); + continue; + } if (isa(I) || isa(I)) { // If uses of the bitcast are ok, we are ok. - ValuesToInspect.emplace_back(I, IsOffset); + Worklist.emplace_back(I, IsOffset); continue; } if (auto *GEP = dyn_cast(I)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. - ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices()); + Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices()); continue; } @@ -240,9 +259,11 @@ class PointerReplacer { void replacePointer(Instruction &I, Value *V); private: + bool collectUsersRecursive(Instruction &I); void replace(Instruction *I); Value *getReplacement(Value *I); + SmallPtrSet ValuesToRevisit; SmallSetVector Worklist; MapVector WorkMap; InstCombinerImpl &IC; @@ -250,15 +271,47 @@ class PointerReplacer { } // end anonymous namespace bool PointerReplacer::collectUsers(Instruction &I) { + if (!collectUsersRecursive(I)) + return false; + + // Ensure that all outstanding (indirect) users of I + // are inserted into the Worklist. Return false + // otherwise. + for (auto *Inst : ValuesToRevisit) + if (!Worklist.contains(Inst)) + return false; + return true; +} + +bool PointerReplacer::collectUsersRecursive(Instruction &I) { for (auto *U : I.users()) { auto *Inst = cast(&*U); if (auto *Load = dyn_cast(Inst)) { if (Load->isVolatile()) return false; Worklist.insert(Load); - } else if (isa(Inst) || isa(Inst)) { + } else if (auto *PHI = dyn_cast(Inst)) { + // All incoming values must be instructions for replacability + if (any_of(PHI->incoming_values(), + [](Value *V) { return !isa(V); })) + return false; + + // If at least one incoming value of the PHI is not in Worklist, + // store the PHI for revisiting and skip this iteration of the + // loop. + if (any_of(PHI->incoming_values(), [this](Value *V) { + return !Worklist.contains(cast(V)); + })) { + ValuesToRevisit.insert(Inst); + continue; + } + + Worklist.insert(PHI); + if (!collectUsersRecursive(*PHI)) + return false; + } else if (isa(Inst)) { Worklist.insert(Inst); - if (!collectUsers(*Inst)) + if (!collectUsersRecursive(*Inst)) return false; } else if (auto *MI = dyn_cast(Inst)) { if (MI->isVolatile()) @@ -293,6 +346,14 @@ void PointerReplacer::replace(Instruction *I) { IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast(I)) { + Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType(); + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), + PHI->getName(), PHI); + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) + NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)), + PHI->getIncomingBlock(I)); + WorkMap[PHI] = NewPHI; } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 091bb96b6365c..125462da24aa7 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1944,14 +1944,6 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; - // LICM moves a GEP with constant indices to the front, while canonicalization - // swaps it to the back of a non-constant GEP. If both transformations can be - // applied, LICM takes priority because it generally provides greater - // optimization by reducing instruction count in the loop body, but performing - // canonicalization swapping first negates the LICM opportunity while it does - // not necessarily reduce instruction count. - bool ShouldCanonicalizeSwap = true; - if (Src->getResultElementType() == GEP.getSourceElementType() && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && Src->hasOneUse()) { @@ -1961,12 +1953,6 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (LI) { // Try to reassociate loop invariant GEP chains to enable LICM. if (Loop *L = LI->getLoopFor(GEP.getParent())) { - // If SO1 is invariant and GO1 is variant, they should not be swapped by - // canonicalization even if it can be applied, otherwise it triggers - // LICM swapping in the next iteration, causing an infinite loop. - if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1)) - ShouldCanonicalizeSwap = false; - // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is // invariant: this breaks the dependence between GEPs and allows LICM // to hoist the invariant part out of the loop. @@ -1991,31 +1977,12 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, } } - // Canonicalize swapping. Swap GEP with constant index suffix to the back if - // it doesn't violate def-use relations or contradict with loop invariant - // swap above. This allows more potential applications of constant-indexed GEP - // optimizations below. - if (ShouldCanonicalizeSwap && Src->hasOneUse() && - Src->getPointerOperandType() == GEP.getPointerOperandType() && - Src->getType()->isVectorTy() == GEP.getType()->isVectorTy() && - !isa(Src->getPointerOperand())) { - // When swapping, GEP with all constant indices are more prioritized than - // GEP with only the last few indices (but not all) being constant because - // it may be merged with GEP with all constant indices. - if ((isa(*(Src->indices().end() - 1)) && - !isa(*(GEP.indices().end() - 1))) || - (Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) { - // Cannot guarantee inbounds after swapping because the non-const GEP can - // have arbitrary sign. - Value *NewSrc = Builder.CreateGEP( - GEP.getSourceElementType(), Src->getOperand(0), - SmallVector(GEP.indices()), Src->getName()); - GetElementPtrInst *NewGEP = GetElementPtrInst::Create( - Src->getSourceElementType(), NewSrc, - SmallVector(Src->indices()), GEP.getName()); - return NewGEP; - } - } + // Note that if our source is a gep chain itself then we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + if (auto *SrcGEP = dyn_cast(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) + return nullptr; // Wait until our source is folded to completion. // For constant GEPs, use a more general offset-based folding approach. // Only do this for opaque pointers, as the result element type may change. diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 21628b61edd62..9a9ac88b1c697 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -580,8 +580,8 @@ static bool LinearizeExprTree(Instruction *I, // expression. This means that it can safely be modified. See if we // can usefully morph it into an expression of the right kind. assert((!isa(Op) || - cast(Op)->getOpcode() != Opcode - || (isa(Op) && + cast(Op)->getOpcode() != Opcode || + (isa(Op) && !hasFPAssociativeFlags(cast(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 9e2fb6a27f5a4..81d151c2904e8 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -88,6 +88,8 @@ using BBPredicates = DenseMap; using PredMap = DenseMap; using BB2BBMap = DenseMap; +using BranchDebugLocMap = DenseMap; + // A traits type that is intended to be used in graph algorithms. The graph // traits starts at an entry node, and traverses the RegionNodes that are in // the Nodes set. @@ -260,6 +262,8 @@ class StructurizeCFG { PredMap LoopPreds; BranchVector LoopConds; + BranchDebugLocMap TermDL; + RegionNode *PrevNode; void orderNodes(); @@ -541,6 +545,14 @@ void StructurizeCFG::collectInfos() { // Find the last back edges analyzeLoops(RN); } + + // Reset the collected term debug locations + TermDL.clear(); + + for (BasicBlock &BB : *Func) { + if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) + TermDL[&BB] = DL; + } } /// Insert the missing branch conditions @@ -828,7 +840,8 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, } else { BasicBlock *BB = Node->getNodeAs(); killTerminator(BB); - BranchInst::Create(NewExit, BB); + BranchInst *Br = BranchInst::Create(NewExit, BB); + Br->setDebugLoc(TermDL[BB]); addPhiValues(BB, NewExit); if (IncludeDominator) DT->changeImmediateDominator(NewExit, BB); @@ -843,6 +856,12 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); FlowSet.insert(Flow); + + // use a temporary variable to avoid a use-after-free if the map's storage is + // reallocated + DebugLoc DL = TermDL[Dominator]; + TermDL[Flow] = std::move(DL); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); return Flow; @@ -938,7 +957,9 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); // let it point to entry and next block - Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); + BranchInst *Br = BranchInst::Create(Entry, Next, BoolUndef, Flow); + Br->setDebugLoc(TermDL[Flow]); + Conditions.push_back(Br); addPhiValues(Flow, Entry); DT->changeImmediateDominator(Entry, Flow); @@ -977,8 +998,9 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed, // Create an extra loop end node LoopEnd = needPrefix(false); BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); - LoopConds.push_back(BranchInst::Create(Next, LoopStart, - BoolUndef, LoopEnd)); + BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolUndef, LoopEnd); + Br->setDebugLoc(TermDL[LoopEnd]); + LoopConds.push_back(Br); addPhiValues(LoopEnd, LoopStart); setPrevNode(Next); } @@ -1175,6 +1197,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { LoopPreds.clear(); LoopConds.clear(); FlowSet.clear(); + TermDL.clear(); return true; } diff --git a/llvm/test/Analysis/CallGraph/llvm-used.ll b/llvm/test/Analysis/CallGraph/llvm-used.ll deleted file mode 100644 index 09d4e95d594b4..0000000000000 --- a/llvm/test/Analysis/CallGraph/llvm-used.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s - -; The test will report used1 and used2 functions as used on the grounds -; of llvm.*.used references. Passing IgnoreLLVMUsed = true into the -; Function::hasAddressTaken() in the CallGraph::addToCallGraph() has to -; change their uses to zero. - -; CHECK: Call graph node <><<{{.*}}>> #uses=0 -; CHECK-NEXT: CS calls function 'used1' -; CHECK-NEXT: CS calls function 'used2' -; CHECK-NEXT: CS calls function 'unused' -; CHECK-EMPTY: -; CHECK-NEXT: Call graph node for function: 'unused'<<{{.*}}>> #uses=1 -; CHECK-EMPTY: -; CHECK-NEXT: Call graph node for function: 'used1'<<{{.*}}>> #uses=1 -; CHECK-EMPTY: -; CHECK-NEXT: Call graph node for function: 'used2'<<{{.*}}>> #uses=1 -; CHECK-EMPTY: - -@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @used1 to i8*)] -@llvm.compiler.used = appending global [1 x void()*] [void ()* @used2] -@array = appending global [1 x i8*] [i8* bitcast (void ()* @unused to i8*)] - -define internal void @used1() { -entry: - ret void -} - -define internal void @used2() { -entry: - ret void -} - -define internal void @unused() { -entry: - ret void -} diff --git a/llvm/test/Assembler/DIExpr.ll b/llvm/test/Assembler/DIExpr.ll index e38f773cdc46f..fe955b414fbb5 100644 --- a/llvm/test/Assembler/DIExpr.ll +++ b/llvm/test/Assembler/DIExpr.ll @@ -1,53 +1,82 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s ; RUN: verify-uselistorder %s +; CHECK: %t = type { i32, i32 } +%t = type { i32, i32 } +; CHECK: %u = type { %t, i32 } +%u = type { %t, i32 } + ; CHECK: !named = !{ +!named = !{ ; CHECK-SAME: !DIExpr(), +!DIExpr(), ; CHECK-SAME: !DIExpr(DIOpReferrer(i32)), +!DIExpr(DIOpReferrer(i32)), +; CHECK-SAME: !DIExpr(DIOpReferrer(%t)), +!DIExpr(DIOpReferrer(%t)), +; CHECK-SAME: !DIExpr(DIOpReferrer(%u)), +!DIExpr(DIOpReferrer(%u)), +; CHECK-SAME: !DIExpr(DIOpReferrer({ i16, float })), +!DIExpr(DIOpReferrer({ i16, float })), ; CHECK-SAME: !DIExpr(DIOpArg(0, i32), DIOpConvert(float)), +!DIExpr(DIOpArg(0, i32), DIOpConvert(float)), +; CHECK-SAME: !DIExpr(DIOpArg(0, %t), DIOpConvert(%u)), +!DIExpr(DIOpArg(0, %t), DIOpConvert(%u)), ; CHECK-SAME: !DIExpr(DIOpTypeObject(double)), +!DIExpr(DIOpTypeObject(double)), +; CHECK-SAME: !DIExpr(DIOpTypeObject(%t)), +!DIExpr(DIOpTypeObject(%t)), ; CHECK-SAME: !DIExpr(DIOpConstant(i8 1)), +!DIExpr(DIOpConstant(i8 1)), +; CHECK-SAME: !DIExpr(DIOpConstant(%u undef)), +!DIExpr(DIOpConstant(%u undef)), ; CHECK-SAME: !DIExpr(DIOpConvert(i16)), +!DIExpr(DIOpConvert(i16)), +; CHECK-SAME: !DIExpr(DIOpConvert(%t)), +!DIExpr(DIOpConvert(%t)), ; CHECK-SAME: !DIExpr(DIOpReinterpret(i64)), +!DIExpr(DIOpReinterpret(i64)), +; CHECK-SAME: !DIExpr(DIOpReinterpret(%t)), +!DIExpr(DIOpReinterpret(%t)), ; CHECK-SAME: !DIExpr(DIOpBitOffset(i1)), +!DIExpr(DIOpBitOffset(i1)), +; CHECK-SAME: !DIExpr(DIOpBitOffset(%u)), +!DIExpr(DIOpBitOffset(%u)), ; CHECK-SAME: !DIExpr(DIOpByteOffset(i16)), +!DIExpr(DIOpByteOffset(i16)), +; CHECK-SAME: !DIExpr(DIOpByteOffset(%t)), +!DIExpr(DIOpByteOffset(%t)), ; CHECK-SAME: !DIExpr(DIOpComposite(4, i8)), +!DIExpr(DIOpComposite(4, i8)), +; CHECK-SAME: !DIExpr(DIOpComposite(2, %u)), +!DIExpr(DIOpComposite(2, %u)), ; CHECK-SAME: !DIExpr(DIOpExtend(6)), +!DIExpr(DIOpExtend(6)), ; CHECK-SAME: !DIExpr(DIOpSelect()), +!DIExpr(DIOpSelect()), ; CHECK-SAME: !DIExpr(DIOpAddrOf(1)), +!DIExpr(DIOpAddrOf(1)), ; CHECK-SAME: !DIExpr(DIOpDeref(i32)), +!DIExpr(DIOpDeref(i32)), +; CHECK-SAME: !DIExpr(DIOpDeref(%t)), +!DIExpr(DIOpDeref(%t)), ; CHECK-SAME: !DIExpr(DIOpRead()), +!DIExpr(DIOpRead()), ; CHECK-SAME: !DIExpr(DIOpAdd()), +!DIExpr(DIOpAdd()), ; CHECK-SAME: !DIExpr(DIOpSub()), +!DIExpr(DIOpSub()), ; CHECK-SAME: !DIExpr(DIOpMul()), +!DIExpr(DIOpMul()), ; CHECK-SAME: !DIExpr(DIOpDiv()), +!DIExpr(DIOpDiv()), ; CHECK-SAME: !DIExpr(DIOpShr()), +!DIExpr(DIOpShr()), ; CHECK-SAME: !DIExpr(DIOpShl()), +!DIExpr(DIOpShl()), ; CHECK-SAME: !DIExpr(DIOpPushLane(i32)), +!DIExpr(DIOpPushLane(i32)), +; CHECK-SAME: !DIExpr(DIOpPushLane(%u)), +!DIExpr(DIOpPushLane(%u)), ; CHECK-SAME: !DIExpr()} - -!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22} - -!0 = !DIExpr() -!1 = !DIExpr(DIOpReferrer(i32)) -!2 = !DIExpr(DIOpArg(0, i32), DIOpConvert(float)) -!3 = !DIExpr(DIOpTypeObject(double)) -!4 = !DIExpr(DIOpConstant(i8 1)) -!5 = !DIExpr(DIOpConvert(i16)) -!6 = !DIExpr(DIOpReinterpret(i64)) -!7 = !DIExpr(DIOpBitOffset(i1)) -!8 = !DIExpr(DIOpByteOffset(i16)) -!9 = !DIExpr(DIOpComposite(4, i8)) -!10 = !DIExpr(DIOpExtend(6)) -!11 = !DIExpr(DIOpSelect()) -!12 = !DIExpr(DIOpAddrOf(1)) -!13 = !DIExpr(DIOpDeref(i32)) -!14 = !DIExpr(DIOpRead()) -!15 = !DIExpr(DIOpAdd()) -!16 = !DIExpr(DIOpSub()) -!17 = !DIExpr(DIOpMul()) -!18 = !DIExpr(DIOpDiv()) -!19 = !DIExpr(DIOpShr()) -!20 = !DIExpr(DIOpShl()) -!21 = !DIExpr(DIOpPushLane(i32)) -!22 = !DIExpr() +!DIExpr()} diff --git a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll index 32463af9f4377..90ca343a65096 100644 --- a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll @@ -58,7 +58,7 @@ entry: ; FUNC-LABEL: {{^}}null_32bit_lds_ptr: ; GFX7 v_cmp_ne_u32 -; GFX7: s_cselect_b32 +; GFX7: v_cndmask_b32 ; GFX8: s_cmp_lg_u32 ; GFX8-NOT: v_cmp_ne_u32 ; GFX8: s_cselect_b32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir index b72abbf557b36..f85c1802095b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -8,8 +8,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_MERGE_VALUES %0, %1 @@ -24,8 +24,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_MERGE_VALUES %0, %1 @@ -40,8 +40,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s128) = G_MERGE_VALUES %0, %1 @@ -56,8 +56,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s128) = G_MERGE_VALUES %0, %1 @@ -217,8 +217,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 1 @@ -234,8 +234,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_CONSTANT i32 1 @@ -292,8 +292,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 @@ -308,8 +308,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 @@ -369,8 +369,8 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -388,8 +388,8 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 6f866ea478569..6b765986cda78 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -56,10 +56,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll index ac1303ba888c9..af429e85e8c1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll @@ -7,11 +7,12 @@ define i32 addrspace(1)* @call_assert_align() { ; CHECK-LABEL: call_assert_align: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[16:17] -; CHECK-NEXT: v_writelane_b32 v40, s33, 2 +; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v40, s16, 2 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 @@ -26,11 +27,12 @@ define i32 addrspace(1)* @call_assert_align() { ; CHECK-NEXT: global_store_dword v[0:1], v2, off ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: v_readlane_b32 s4, v40, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_addk_i32 s32, 0xfc00 -; CHECK-NEXT: v_readlane_b32 s33, v40, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll index a76e5360856ac..33a6b1d0dc898 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll @@ -9,27 +9,27 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -40,29 +40,29 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -73,29 +73,29 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -106,33 +106,33 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll index b0dbffeb5c9c2..f451bc751b96f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll @@ -8,29 +8,29 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %ret @@ -41,31 +41,31 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %ret @@ -76,31 +76,31 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret float %ret @@ -111,35 +111,35 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll index 5b3180bb3dbf1..cc379eaf32b95 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll @@ -7,16 +7,16 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i3 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -27,17 +27,17 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -48,17 +48,17 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -69,19 +69,19 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i3 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -92,22 +92,22 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret @@ -118,23 +118,23 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret @@ -145,23 +145,23 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret @@ -172,25 +172,25 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll index e4a7dcbf75420..435a5cfb18389 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -8,27 +8,27 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -39,29 +39,29 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,29 +72,29 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -105,33 +105,33 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll index 6688f3cbcf9b4..a978255190034 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll @@ -7,15 +7,15 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -26,16 +26,16 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret @@ -46,16 +46,16 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -66,18 +66,18 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir index daf7b3a08dc6b..bbad495b9e9e1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -33,13 +33,13 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[PRED_COPY]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -69,13 +69,13 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[PRED_COPY]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index 41ac9f96d1b78..b9002145cafcf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -236,11 +236,12 @@ define void @func_caller_stack() { ; MUBUF-LABEL: func_caller_stack: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v40, s33, 2 +; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 @@ -258,22 +259,24 @@ define void @func_caller_stack() { ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 -; MUBUF-NEXT: v_readlane_b32 s33, v40, 2 -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: func_caller_stack: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2 +; FLATSCR-NEXT: s_mov_b32 s0, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 @@ -291,11 +294,12 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: s_add_i32 s32, s32, -16 -; FLATSCR-NEXT: v_readlane_b32 s33, v40, 2 -; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] +; FLATSCR-NEXT: s_mov_b32 s33, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> undef, <16 x i32> undef, <4 x i32> ) @@ -306,11 +310,12 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { ; MUBUF-LABEL: func_caller_byval: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v40, s33, 2 +; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 @@ -375,22 +380,24 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 -; MUBUF-NEXT: v_readlane_b32 s33, v40, 2 -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: func_caller_byval: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2 +; FLATSCR-NEXT: s_mov_b32 s0, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 @@ -424,11 +431,12 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: s_add_i32 s32, s32, -16 -; FLATSCR-NEXT: v_readlane_b32 s33, v40, 2 -; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] +; FLATSCR-NEXT: s_mov_b32 s33, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %cast = bitcast [16 x i32] addrspace(5)* %argptr to i8 addrspace(5)* diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index 649d9a4d5966d..fc4f9c2d861f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -7,44 +7,43 @@ declare void @callee() define amdgpu_kernel void @call_debug_loc() { ; CHECK-LABEL: name: call_debug_loc ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY7]], debug-location !6 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !6 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY6]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY5]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY4]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY3]], debug-location !6 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY14]], [[PRED_COPY1]], implicit $exec, debug-location !6 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6 - ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[PRED_COPY]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[PRED_COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]], debug-location !6 + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]], debug-location !6 + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]], debug-location !6 + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY8]], debug-location !6 + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]], debug-location !6 + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]], debug-location !6 + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]], debug-location !6 + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]], debug-location !6 + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]], debug-location !6 + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[V_OR3_B32_e64_]], debug-location !6 ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6 @@ -60,12 +59,12 @@ define void @returnaddress_debug_loc(i8* addrspace(1)* %ptr) { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31, debug-location !6 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[PRED_COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: %returnaddr = call i8* @llvm.returnaddress(i32 0), !dbg !6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll index 5dfde116785db..39254af35642f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s +; RUN: llc -global-isel -amdgpu-enable-remove-incompatible-functions=0 -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s ; Make sure legalizer info doesn't assert on dummy targets @@ -8,11 +8,11 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -23,13 +23,13 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -40,7 +40,7 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add @@ -51,9 +51,9 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add @@ -64,12 +64,12 @@ define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY1]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ADD1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll index 11ba9bc6d2319..621c52b932ff8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -73,7 +73,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -84,6 +83,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -99,7 +99,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -110,6 +109,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -125,7 +125,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -137,6 +136,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() { ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: s_add_i32 s32, s32, -16 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, i32 addrspace(4)* @gv, align 4 @@ -213,7 +213,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -224,6 +223,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -239,7 +239,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -250,6 +249,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -265,7 +265,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -277,6 +276,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() { ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: s_add_i32 s32, s32, -16 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, i32 addrspace(4)* @gv, align 16 @@ -357,7 +357,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -369,6 +368,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX9-NEXT: s_addk_i32 s32, 0xf000 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -385,7 +385,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -397,6 +396,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -414,7 +414,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -427,6 +426,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) { ; GFX11-NEXT: s_addk_i32 s32, 0xffc0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xfffffc00 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, i32 addrspace(4)* @gv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 5eb4fe06dd1f5..390d214ce6f43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -348,9 +348,9 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(16) ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %vec = load <128 x i16>, <128 x i16> addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index d71b58f484a42..d2a6749f855d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -7,21 +7,21 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) ret void @@ -32,23 +32,23 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %d ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) ret float %ret @@ -59,21 +59,21 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic ret void @@ -84,23 +84,23 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %d ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index 4ae1c5647e690..fd9fac64a49b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -7,12 +7,12 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, doubl ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) @@ -24,19 +24,19 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) ret double %ret @@ -47,12 +47,12 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, doubl ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic @@ -64,19 +64,19 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll index 94c8fd34926e5..5b1a79827de39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -6,11 +6,11 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr, ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) ret void @@ -21,12 +21,12 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %p ; GFX940: bb.1 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 75b2d1969673b..b09f23810620b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -9,7 +9,7 @@ define i1 @i1_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val @@ -21,7 +21,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val @@ -33,7 +33,7 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val @@ -45,7 +45,7 @@ define i7 @i7_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val @@ -57,7 +57,7 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val @@ -69,7 +69,7 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val @@ -81,7 +81,7 @@ define i8 @i8_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val @@ -93,7 +93,7 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val @@ -105,7 +105,7 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val @@ -117,7 +117,7 @@ define i16 @i16_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val @@ -129,7 +129,7 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val @@ -141,7 +141,7 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val @@ -153,7 +153,7 @@ define half @f16_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val @@ -165,7 +165,7 @@ define i24 @i24_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val @@ -177,7 +177,7 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val @@ -189,7 +189,7 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val @@ -203,8 +203,8 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val @@ -219,9 +219,9 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val @@ -232,7 +232,7 @@ define i32 @i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val @@ -245,8 +245,8 @@ define i48 @i48_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val @@ -259,8 +259,8 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val @@ -273,8 +273,8 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val @@ -286,8 +286,8 @@ define i64 @i64_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val @@ -300,9 +300,9 @@ define i65 @i65_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val @@ -315,9 +315,9 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val @@ -330,9 +330,9 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val @@ -343,7 +343,7 @@ define float @f32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val @@ -355,8 +355,8 @@ define double @f64_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val @@ -368,10 +368,10 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val @@ -383,8 +383,8 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val @@ -396,9 +396,9 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val @@ -410,10 +410,10 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val @@ -425,11 +425,11 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val @@ -442,14 +442,14 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr @@ -463,22 +463,22 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr @@ -492,38 +492,38 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr @@ -536,10 +536,10 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val @@ -552,12 +552,12 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr @@ -571,14 +571,14 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr @@ -592,16 +592,16 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr @@ -615,22 +615,22 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr @@ -644,38 +644,38 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr @@ -687,7 +687,7 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val @@ -698,7 +698,7 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val @@ -713,8 +713,8 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val @@ -726,8 +726,8 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val @@ -739,8 +739,8 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val @@ -756,9 +756,9 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr @@ -772,10 +772,10 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr @@ -789,14 +789,14 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr @@ -827,37 +827,37 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT16]](s32) ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT17]](s32) ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT18]](s32) ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT19]](s32) ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT20]](s32) ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT21]](s32) ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT22]](s32) ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT23]](s32) ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[ANYEXT24]](s32) ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[ANYEXT25]](s32) ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[ANYEXT26]](s32) ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[ANYEXT27]](s32) ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[ANYEXT28]](s32) ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[ANYEXT29]](s32) ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[ANYEXT30]](s32) ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[ANYEXT31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -873,9 +873,9 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val @@ -891,11 +891,11 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val @@ -913,13 +913,13 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT6]](s32) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT7]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr @@ -935,8 +935,8 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val @@ -947,14 +947,14 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[PRED_COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, i8 addrspace(1)* undef @@ -975,11 +975,11 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[PRED_COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr @@ -991,18 +991,18 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p1) = PRED_COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[PRED_COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[PRED_COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep @@ -1014,16 +1014,16 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[PRED_COPY]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef @@ -1036,16 +1036,16 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PRED_COPY]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: SI_RETURN %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef @@ -1071,10 +1071,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef @@ -1093,7 +1093,7 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY [[DEF]](p3) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1102,15 +1102,15 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[LOAD3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef @@ -1130,12 +1130,12 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[PRED_COPY]](p5) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) @@ -1162,38 +1162,38 @@ define i1022 @i1022_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val @@ -1206,38 +1206,38 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val @@ -1250,38 +1250,38 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[UV31]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val @@ -1294,7 +1294,7 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 @@ -1306,15 +1306,15 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[PRED_COPY]](p5) :: (store (<32 x s32>), addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll index 58db5db52a475..94477dc54c69f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -9,21 +9,21 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1 ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret void @@ -34,23 +34,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrs ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret void @@ -61,21 +61,21 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrsp ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret void @@ -86,23 +86,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret void @@ -113,21 +113,21 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1 ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret void @@ -138,23 +138,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrs ; GFX908_GFX11: bb.1 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll index 0e97b51801b97..c2eb193d6665d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -8,23 +8,23 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)* ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret float %ret @@ -35,25 +35,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspa ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret float %ret @@ -64,23 +64,23 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspac ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret float %ret @@ -91,25 +91,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float ad ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret float %ret @@ -120,23 +120,23 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)* ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret float %ret @@ -147,25 +147,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspa ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll index 32f33683f0f5c..9589899ff9967 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll @@ -7,12 +7,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace( ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -24,19 +24,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -47,12 +47,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addr ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -65,20 +65,20 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrs ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -89,12 +89,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrs ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -106,19 +106,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrsp ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -129,12 +129,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -147,20 +147,20 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -171,12 +171,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace( ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic @@ -188,19 +188,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1 ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret double %ret @@ -211,12 +211,12 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addr ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 @@ -229,20 +229,20 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrs ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll index 6fe7eef2b10d1..d1779aefe52ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -8,21 +8,21 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addr ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -33,23 +33,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -60,21 +60,21 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half> ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -85,23 +85,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll index 3ee4d957a572b..a135457214d66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -7,12 +7,12 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> a ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -23,13 +23,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x h ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -40,12 +40,12 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x ha ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[PRED_COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -56,13 +56,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(< ; GFX90A_GFX940: bb.1 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[PRED_COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index 0812972204354..aa09eb2288a20 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -19,8 +19,8 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } @@ -31,8 +31,8 @@ define i32 addrspace(1)* @external_global_got() { ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } @@ -43,8 +43,8 @@ define i32 addrspace(999)* @external_other_got() { ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } @@ -54,8 +54,8 @@ define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } @@ -65,8 +65,8 @@ define i32 addrspace(1)* @internal_global_pcrel() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } @@ -76,8 +76,8 @@ define i32 addrspace(999)* @internal_other_pcrel() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } @@ -88,7 +88,7 @@ define i32 addrspace(6)* @external_constant32_got() { ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } @@ -98,7 +98,7 @@ define i32 addrspace(6)* @internal_constant32_pcrel() { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index a6be8956dbcd7..e158985a21923 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -7,9 +7,11 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 +; CHECK-NEXT: ; implicit-def: $vgpr8 ; CHECK-NEXT: v_mov_b32_e32 v14, v1 ; CHECK-NEXT: v_mov_b32_e32 v13, v2 ; CHECK-NEXT: v_mov_b32_e32 v12, v3 @@ -33,15 +35,22 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v16, s4, 0 -; CHECK-NEXT: v_writelane_b32 v16, s5, 1 -; CHECK-NEXT: v_writelane_b32 v16, s6, 2 -; CHECK-NEXT: v_writelane_b32 v16, s7, 3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_writelane_b32 v0, s4, 0 +; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: v_writelane_b32 v0, s6, 2 +; CHECK-NEXT: v_writelane_b32 v0, s7, 3 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 @@ -49,8 +58,15 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v16, s4, 4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_writelane_b32 v0, s4, 4 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -77,6 +93,9 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_readfirstlane_b32 s6, v2 ; CHECK-NEXT: v_readfirstlane_b32 s5, v1 ; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; CHECK-NEXT: s_mov_b32 s13, s10 ; CHECK-NEXT: s_mov_b32 s14, s9 @@ -85,14 +104,18 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_mov_b32 s17, s6 ; CHECK-NEXT: s_mov_b32 s18, s5 ; CHECK-NEXT: s_mov_b32 s19, s4 -; CHECK-NEXT: v_writelane_b32 v16, s12, 5 -; CHECK-NEXT: v_writelane_b32 v16, s13, 6 -; CHECK-NEXT: v_writelane_b32 v16, s14, 7 -; CHECK-NEXT: v_writelane_b32 v16, s15, 8 -; CHECK-NEXT: v_writelane_b32 v16, s16, 9 -; CHECK-NEXT: v_writelane_b32 v16, s17, 10 -; CHECK-NEXT: v_writelane_b32 v16, s18, 11 -; CHECK-NEXT: v_writelane_b32 v16, s19, 12 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_writelane_b32 v0, s12, 5 +; CHECK-NEXT: v_writelane_b32 v0, s13, 6 +; CHECK-NEXT: v_writelane_b32 v0, s14, 7 +; CHECK-NEXT: v_writelane_b32 v0, s15, 8 +; CHECK-NEXT: v_writelane_b32 v0, s16, 9 +; CHECK-NEXT: v_writelane_b32 v0, s17, 10 +; CHECK-NEXT: v_writelane_b32 v0, s18, 11 +; CHECK-NEXT: v_writelane_b32 v0, s19, 12 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: v_mov_b32_e32 v6, v8 ; CHECK-NEXT: v_mov_b32_e32 v7, v9 ; CHECK-NEXT: v_mov_b32_e32 v4, v10 @@ -111,25 +134,36 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3] ; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v16, s4, 13 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_writelane_b32 v0, s4, 13 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: v_readlane_b32 s4, v16, 13 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readlane_b32 s4, v2, 13 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s8, v16, 5 -; CHECK-NEXT: v_readlane_b32 s9, v16, 6 -; CHECK-NEXT: v_readlane_b32 s10, v16, 7 -; CHECK-NEXT: v_readlane_b32 s11, v16, 8 -; CHECK-NEXT: v_readlane_b32 s12, v16, 9 -; CHECK-NEXT: v_readlane_b32 s13, v16, 10 -; CHECK-NEXT: v_readlane_b32 s14, v16, 11 -; CHECK-NEXT: v_readlane_b32 s15, v16, 12 -; CHECK-NEXT: v_readlane_b32 s16, v16, 0 -; CHECK-NEXT: v_readlane_b32 s17, v16, 1 -; CHECK-NEXT: v_readlane_b32 s18, v16, 2 -; CHECK-NEXT: v_readlane_b32 s19, v16, 3 +; CHECK-NEXT: v_readlane_b32 s8, v2, 5 +; CHECK-NEXT: v_readlane_b32 s9, v2, 6 +; CHECK-NEXT: v_readlane_b32 s10, v2, 7 +; CHECK-NEXT: v_readlane_b32 s11, v2, 8 +; CHECK-NEXT: v_readlane_b32 s12, v2, 9 +; CHECK-NEXT: v_readlane_b32 s13, v2, 10 +; CHECK-NEXT: v_readlane_b32 s14, v2, 11 +; CHECK-NEXT: v_readlane_b32 s15, v2, 12 +; CHECK-NEXT: v_readlane_b32 s16, v2, 0 +; CHECK-NEXT: v_readlane_b32 s17, v2, 1 +; CHECK-NEXT: v_readlane_b32 s18, v2, 2 +; CHECK-NEXT: v_readlane_b32 s19, v2, 3 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) @@ -137,16 +171,25 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: v_readlane_b32 s4, v16, 4 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readlane_b32 s4, v0, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: ; kill: killed $vgpr4 +; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll index 28865f55ec5f3..cdc3f2fd15964 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { ; GFX8V3-LABEL: addrspacecast: @@ -476,9 +476,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) @@ -495,9 +493,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) @@ -515,9 +511,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V5-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 -; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc -; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, s8 @@ -546,3 +540,6 @@ declare i1 @llvm.amdgcn.is.shared(i8*) declare i1 @llvm.amdgcn.is.private(i8*) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll index 15b92df2f1601..530b5a84510ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll @@ -51,9 +51,9 @@ define i32 @return_type_is_too_small_scalar() { ; CHECK-LABEL: name: return_type_is_too_small_scalar ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TRUNC]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" () ret i32 %reg @@ -88,16 +88,16 @@ define void @use_vector_too_small(<8 x i32> %arg) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[BUILD_VECTOR]](<8 x s32>) ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: SI_RETURN call void asm sideeffect "; use $0", "{v[0:7]}"(<8 x i32> %arg) @@ -110,15 +110,15 @@ define void @use_vector_too_big(<8 x i32> %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg) @@ -131,9 +131,9 @@ define void @use_scalar_too_small(i64 %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(i64 %arg) @@ -145,9 +145,9 @@ define void @use_scalar_too_big(i32 %arg) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[PRED_COPY]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[ANYEXT]](s64) ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1 ; CHECK-NEXT: SI_RETURN call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg) @@ -160,9 +160,9 @@ define void @use_pointer_too_small(i8 addrspace(1)* %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg) @@ -175,7 +175,7 @@ define void @use_pointer_too_big(i32 addrspace(3)* %arg) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.0): call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir index 02e96bc04729a..d18329e52ef21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir @@ -17,13 +17,13 @@ body: | ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_ss ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit %smax ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax %src0:sgpr(s32) = COPY $sgpr0 @@ -46,13 +46,13 @@ body: | ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_ss_commute ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit %smax ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_ss_commute ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: %src0:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: %smax:sreg_32 = S_ABS_I32 %src0, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit %smax %src0:sgpr(s32) = COPY $sgpr0 @@ -75,7 +75,7 @@ body: | ; GFX6-LABEL: name: smax_neg_abs_pattern_s32_vv ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %src0:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: %src0:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec ; GFX6-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec @@ -83,7 +83,7 @@ body: | ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %src0:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: %src0:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: %ineg:vgpr_32 = V_SUB_U32_e64 %zero, %src0, 0, implicit $exec ; GFX9-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index e395c90018e3d..b066c7f71da11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -18,24 +18,24 @@ body: | ; GFX6-LABEL: name: add_s32 ; GFX6: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec - ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_2]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_4]] ; GFX9-LABEL: name: add_s32 ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[S_ADD_I32_]], 0, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,14 +73,14 @@ body: | ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], 64, implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], 64, implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 -64 @@ -102,14 +102,14 @@ body: | ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64 = V_SUB_CO_U32_e64 [[COPY]], 64, 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[PRED_COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]] ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -64 @@ -131,16 +131,16 @@ body: | ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 16 @@ -162,16 +162,16 @@ body: | ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir index 35a336755bc6a..6dce1b5d9e30f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -19,16 +19,16 @@ body: | ; GFX6-LABEL: name: add_s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -52,16 +52,16 @@ body: | ; GFX6-LABEL: name: add_s16_zext_to_s32 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] ; GFX10-LABEL: name: add_s16_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ADD_NC_U16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -87,14 +87,14 @@ body: | ; GFX6-LABEL: name: add_s16_neg_inline_const_64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[PRED_COPY]], 0, 64, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -117,14 +117,14 @@ body: | ; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[PRED_COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[PRED_COPY]], 0, 64, 0, 0, implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_SUB_NC_U16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir index 675537a901bcc..e27878a2af329 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir @@ -14,16 +14,16 @@ body: | ; WAVE64-LABEL: name: class_s32_vcc_sv ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -43,16 +43,16 @@ body: | ; WAVE64-LABEL: name: class_s32_vcc_vs ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -72,16 +72,16 @@ body: | ; WAVE64-LABEL: name: class_s32_vcc_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] ; WAVE32-LABEL: name: class_s32_vcc_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -101,16 +101,16 @@ body: | ; WAVE64-LABEL: name: class_s64_vcc_sv ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_sv ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -131,16 +131,16 @@ body: | ; WAVE64-LABEL: name: class_s64_vcc_vs ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_vs ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -161,16 +161,16 @@ body: | ; WAVE64-LABEL: name: class_s64_vcc_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] ; WAVE32-LABEL: name: class_s64_vcc_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F64_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir index a5c3690b3ecb8..ed4a1aa711e25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir @@ -21,18 +21,18 @@ body: | liveins: $sgpr0, $vgpr0 ; WAVE32-LABEL: name: class_s16_vcc_sv ; WAVE32: liveins: $sgpr0, $vgpr0 - ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_sv ; WAVE64: liveins: $sgpr0, $vgpr0 - ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 @@ -49,18 +49,18 @@ body: | liveins: $sgpr0, $vgpr0 ; WAVE32-LABEL: name: class_s16_vcc_vs ; WAVE32: liveins: $sgpr0, $vgpr0 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_vs ; WAVE64: liveins: $sgpr0, $vgpr0 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:sgpr(s32) = PRED_COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 @@ -77,18 +77,18 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE32-LABEL: name: class_s16_vcc_vv ; WAVE32: liveins: $vgpr0, $vgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] ; WAVE64-LABEL: name: class_s16_vcc_vv ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 S_ENDPGM 0, implicit %4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir index 0443314158f0f..90a7453ca589c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir @@ -15,9 +15,9 @@ body: | ; CHECK-LABEL: name: cos_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: cos_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir index 2cf44526e30d4..cda23e5d2a6e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir @@ -17,10 +17,10 @@ body: | ; CHECK-LABEL: name: cos_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ body: | ; CHECK-LABEL: name: cos_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir index 1397b9d83854f..79cdf762ee0db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: cvt_pk_i16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: cvt_pk_i16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ body: | ; GCN-LABEL: name: cvt_pk_i16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir index 6c1dd9cdf6b6f..3ffb76695f2b2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: cvt_pk_u16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: cvt_pk_u16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ body: | ; GCN-LABEL: name: cvt_pk_u16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir index 6ebb2669fe6d7..1f3401db20567 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_i16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_i16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_i16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir index 74a169fe1edec..03c9490526390 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_u16_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_u16_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -59,9 +59,9 @@ body: | ; GCN-LABEL: name: cvt_pknorm_u16_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir index 120095124c700..c2dbf6c61aa8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -14,10 +14,10 @@ body: | ; GCN-LABEL: name: cvt_pkrtz_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -37,10 +37,10 @@ body: | ; GCN-LABEL: name: cvt_pkrtz_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -59,10 +59,10 @@ body: | ; GCN-LABEL: name: cvt_pkrtz_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir index f59cde5744e48..90145ed1a770f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ds.swizzle.mir @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: ds_swizzle_0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[PRED_COPY]], 0, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 @@ -38,8 +38,8 @@ body: | ; CHECK-LABEL: name: ds_swizzle_65535 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[PRED_COPY]], 65535, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir index 7928fcc10ff83..4f2150179976e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir @@ -17,10 +17,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -42,10 +42,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -67,10 +67,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vvsv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -92,11 +92,11 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vvvs ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -120,9 +120,9 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vssv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -143,10 +143,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vsvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -167,10 +167,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vvss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -191,9 +191,9 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vsss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0 @@ -232,10 +232,10 @@ body: | ; GCN-LABEL: name: fmad_ftz_s32_vvv_fneg_v ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir index 0d56297ff0819..ddca7f67b2d38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -14,10 +14,10 @@ body: | ; GCN-LABEL: name: fmed3_s32_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -39,10 +39,10 @@ body: | ; GCN-LABEL: name: fmed3_s32_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -64,10 +64,10 @@ body: | ; GCN-LABEL: name: fmed3_s32_vvsv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -89,10 +89,10 @@ body: | ; GCN-LABEL: name: fmed3_s32_vvvs ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -116,9 +116,9 @@ body: | ; GCN-LABEL: name: fmed3_s32_vssv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -139,9 +139,9 @@ body: | ; GCN-LABEL: name: fmed3_s32_vsvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -162,9 +162,9 @@ body: | ; GCN-LABEL: name: fmed3_s32_vvss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -185,8 +185,8 @@ body: | ; GCN-LABEL: name: fmed3_s32_vsss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MED3_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir index 42fd589ec74b6..ae4c0c802c7c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -18,14 +18,14 @@ body: | ; GCN-LABEL: name: fmed3_s16_vvvv ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = COPY $vgpr2 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 + %2:vgpr(s32) = PRED_COPY $vgpr2 %3:vgpr(s16) = G_TRUNC %0 %4:vgpr(s16) = G_TRUNC %1 %5:vgpr(s16) = G_TRUNC %2 @@ -45,14 +45,14 @@ body: | ; GCN-LABEL: name: fmed3_s16_vsvv ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %6 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 - %2:vgpr(s32) = COPY $vgpr1 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 + %2:vgpr(s32) = PRED_COPY $vgpr1 %3:sgpr(s16) = G_TRUNC %0 %4:vgpr(s16) = G_TRUNC %1 %5:vgpr(s16) = G_TRUNC %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir index 086108983d138..0fdf1c0b71425 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir @@ -15,9 +15,9 @@ body: | ; CHECK-LABEL: name: fract_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: fract_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: fract_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ body: | ; CHECK-LABEL: name: fract_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir index 7a3af82260748..2c56df988c364 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir @@ -18,10 +18,10 @@ body: | ; CHECK-LABEL: name: fract_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 S_ENDPGM 0, implicit %2 @@ -39,10 +39,10 @@ body: | ; CHECK-LABEL: name: fract_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir index f107a4b3b7a42..025117b9d8c78 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir @@ -13,10 +13,10 @@ body: | ; GCN-LABEL: name: ldexp_s32_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -35,10 +35,10 @@ body: | ; GCN-LABEL: name: ldexp_s32_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -57,10 +57,10 @@ body: | ; GCN-LABEL: name: ldexp_s32_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -79,10 +79,10 @@ body: | ; GCN-LABEL: name: ldexp_s64_vsv ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -101,10 +101,10 @@ body: | ; GCN-LABEL: name: ldexp_s64_vvs ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 @@ -123,10 +123,10 @@ body: | ; GCN-LABEL: name: ldexp_s64_vvv ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_LDEXP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir index 24d35a7887e5f..985fa16fd57ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir @@ -17,12 +17,12 @@ body: | liveins: $sgpr0, $vgpr0 ; GCN-LABEL: name: ldexp_s16_vsv ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = COPY $vgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 @@ -39,12 +39,12 @@ body: | liveins: $sgpr0, $vgpr0 ; GCN-LABEL: name: ldexp_s16_vvs ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:sgpr(s32) = PRED_COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 @@ -61,12 +61,12 @@ body: | liveins: $vgpr0, $vgpr1 ; GCN-LABEL: name: ldexp_s16_vvv ; GCN: liveins: $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN: %3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %0:vgpr(s32) = PRED_COPY $vgpr0 + %1:vgpr(s32) = PRED_COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), %2, %1 S_ENDPGM 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir index e9f8155528c45..a048d86d2c7cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mbcnt.lo.mir @@ -27,9 +27,9 @@ body: | ; GCN-LABEL: name: mbcnt_lo_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -48,9 +48,9 @@ body: | ; GCN-LABEL: name: smin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -69,9 +69,9 @@ body: | ; GCN-LABEL: name: smin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MBCNT_LO_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir index 7c93e5433924d..896d657211ee0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir @@ -13,9 +13,9 @@ body: | ; GCN-LABEL: name: mul_u24_vsv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ body: | ; GCN-LABEL: name: mul_u24_vvs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ body: | ; GCN-LABEL: name: mul_u24_vvv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir index f9ec4fa7bba41..c1a8aaaf9300c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir @@ -13,9 +13,9 @@ body: | ; CHECK-LABEL: name: mulhi_i24_vsv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: mulhi_i24_vvs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: mulhi_i24_vvv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir index 7d4a269d362e6..f9c2cb0cc78d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir @@ -13,9 +13,9 @@ body: | ; CHECK-LABEL: name: mulhi_u24_vsv ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: mulhi_u24_vvs ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: mulhi_u24_vvv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir index 03402025c8f30..587c0aeb264ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir @@ -19,10 +19,10 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RCP_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir index 590fed59f34f5..72f820bca8d2f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir @@ -15,9 +15,9 @@ body: | ; CHECK-LABEL: name: rcp_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: rcp_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: rcp_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ body: | ; CHECK-LABEL: name: rcp_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RCP_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir index c4cec17cc7251..ef234c5f5329a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir @@ -17,10 +17,10 @@ body: | ; CHECK-LABEL: name: rcp_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ body: | ; CHECK-LABEL: name: rcp_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir index d5c27a36c789b..1f86926d94678 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.readfirstlane.mir @@ -16,8 +16,8 @@ body: | ; GCN-LABEL: name: readfirstlane_v ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 @@ -35,8 +35,8 @@ body: | ; GCN-LABEL: name: readfirstlane_v_imm ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 123, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[PRED_COPY]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:vgpr(s32) = G_CONSTANT i32 123 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir index 471aa53d9eb5e..79acefd186d8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir @@ -22,7 +22,7 @@ body: | ; GCN-LABEL: name: reloc_constant_sgpr32 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @arst - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr0 %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 $sgpr0 = COPY %0 @@ -40,7 +40,7 @@ body: | ; GCN-LABEL: name: reloc_constant_vgpr32 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @arst, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 $vgpr0 = COPY %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir index cb8f47b1f1501..c375080f30234 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir @@ -19,10 +19,10 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_CLAMP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir index 0b952a17dadfb..0cc0d7cf4a5fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir @@ -19,10 +19,10 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 S_ENDPGM 0, implicit %1 ... @@ -39,10 +39,10 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %1:vgpr_32 = nofpexcept V_RSQ_LEGACY_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %1 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir index cadd7efdd3689..1d1b2de009daf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir @@ -15,9 +15,9 @@ body: | ; CHECK-LABEL: name: rsq_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: rsq_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -57,9 +57,9 @@ body: | ; CHECK-LABEL: name: rsq_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 @@ -78,9 +78,9 @@ body: | ; CHECK-LABEL: name: rsq_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RSQ_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir index af5ed0ac9613b..48bd4f306592f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir @@ -17,10 +17,10 @@ body: | ; CHECK-LABEL: name: rsq_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ body: | ; CHECK-LABEL: name: rsq_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir index 1c14b2c395776..47a8460c79a86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir @@ -14,8 +14,8 @@ body: | ; GCN-LABEL: name: test_sendmsg ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $m0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $m0 = COPY [[PRED_COPY]] ; GCN-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0 ; GCN-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir index f3f975f95ba84..6836b3f8e633c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sffbh.mir @@ -14,8 +14,8 @@ body: | ; CHECK-LABEL: name: sffbh_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 @@ -35,8 +35,8 @@ body: | ; CHECK-LABEL: name: sffbh_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: sffbh_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_I32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir index fb55d6380b504..e5b368561a7a5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir @@ -15,9 +15,9 @@ body: | ; CHECK-LABEL: name: sin_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 S_ENDPGM 0, implicit %1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: sin_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir index c0ac731d9769c..ec7182308f3c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir @@ -17,10 +17,10 @@ body: | ; CHECK-LABEL: name: sin_s16_vs ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] - %0:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s32) = PRED_COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 S_ENDPGM 0, implicit %2 @@ -38,10 +38,10 @@ body: | ; CHECK-LABEL: name: sin_s16_vv ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK: %2:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit %2 - %0:vgpr(s32) = COPY $vgpr0 + %0:vgpr(s32) = PRED_COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %1 S_ENDPGM 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index dc568806ddd80..0af5146845e6d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -17,39 +17,39 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -71,59 +71,59 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -147,39 +147,39 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -201,59 +201,59 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -277,79 +277,79 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX9-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -373,35 +373,35 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -422,35 +422,35 @@ body: | ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir index 61a1793d88b4b..19019866def8b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -19,69 +19,69 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -103,89 +103,89 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -209,69 +209,69 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -293,89 +293,89 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[PRED_COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -399,109 +399,109 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX6-NEXT: %19:vgpr_32, dead %21:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %19, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-NEXT: %19:vgpr_32, dead %21:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %19, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX8-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[PRED_COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -525,61 +525,61 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -600,61 +600,61 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[PRED_COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0_sub1, [[PRED_COPY2]], %subreg.sub2_sub3 + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[PRED_COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -675,71 +675,71 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -761,91 +761,91 @@ body: | ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX6: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GFX7-FLAT-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX7-FLAT-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc + ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY5]], [[PRED_COPY6]], implicit-def $scc, implicit $scc ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY5]], [[PRED_COPY6]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[PRED_COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[PRED_COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir index 3c7eb559d3108..a3790427d6317 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: ffbh_u32_s32_s_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_AMDGPU_FFBH_U32 %0 @@ -38,8 +38,8 @@ body: | ; CHECK-LABEL: name: ffbh_u32_s32_v_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 @@ -61,8 +61,8 @@ body: | ; CHECK-LABEL: name: ffbh_u32_v_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir index c340f1e84eefd..aa60891d6ce39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbl-b32.mir @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: ffbl_b32_s32_s_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_AMDGPU_FFBL_B32 %0 @@ -38,8 +38,8 @@ body: | ; CHECK-LABEL: name: ffbl_b32_s32_v_v ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 @@ -61,8 +61,8 @@ body: | ; CHECK-LABEL: name: ffbl_b32_v_s ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_AMDGPU_FFBL_B32 %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index e9caeb54c5666..b91a2e3ba2d3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -17,21 +17,21 @@ body: | ; WAVE64-LABEL: name: and_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -57,16 +57,16 @@ body: | ; WAVE64-LABEL: name: and_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -89,16 +89,16 @@ body: | ; WAVE64-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -121,16 +121,16 @@ body: | ; WAVE64-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -153,16 +153,16 @@ body: | ; WAVE64-LABEL: name: and_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -183,16 +183,16 @@ body: | ; WAVE64-LABEL: name: and_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -213,16 +213,16 @@ body: | ; WAVE64-LABEL: name: and_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -243,16 +243,16 @@ body: | ; WAVE64-LABEL: name: and_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -273,16 +273,16 @@ body: | ; WAVE64-LABEL: name: and_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -303,16 +303,16 @@ body: | ; WAVE64-LABEL: name: and_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -333,16 +333,16 @@ body: | ; WAVE64-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -395,22 +395,22 @@ body: | ; WAVE64-LABEL: name: and_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] ; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] @@ -440,27 +440,27 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_AND_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_AND_B32_1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -488,9 +488,9 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -499,15 +499,15 @@ body: | ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_AND_B32_1]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -535,16 +535,16 @@ body: | ; WAVE64-LABEL: name: and_s32_sgpr_sgpr_sgpr_result_reg_class ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr_result_reg_class ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir index 938423cd89c0b..5ac8e3dc46b22 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -15,8 +15,8 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -36,9 +36,9 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_ANYEXT %0 @@ -58,9 +58,9 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -81,9 +81,9 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s64) = G_ANYEXT %0 @@ -103,9 +103,9 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -142,9 +142,9 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[PRED_COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_ANYEXT %1 @@ -164,8 +164,8 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -184,10 +184,10 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_ANYEXT %1 @@ -206,9 +206,9 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_ANYEXT %1 @@ -228,8 +228,8 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -248,8 +248,8 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_vgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -268,8 +268,8 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -291,8 +291,8 @@ body: | ; GCN-LABEL: name: anyext_regclass_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir index 11a9384f90868..aff18dec488e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -17,37 +17,37 @@ body: | ; GFX6-LABEL: name: ashr_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX7-LABEL: name: ashr_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX8-LABEL: name: ashr_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX9-LABEL: name: ashr_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] ; GFX10-LABEL: name: ashr_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ body: | ; GFX6-LABEL: name: ashr_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ body: | ; GFX6-LABEL: name: ashr_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ body: | ; GFX6-LABEL: name: ashr_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX7-LABEL: name: ashr_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX8-LABEL: name: ashr_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX9-LABEL: name: ashr_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] ; GFX10-LABEL: name: ashr_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ body: | ; GFX6-LABEL: name: ashr_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX7-LABEL: name: ashr_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX8-LABEL: name: ashr_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX9-LABEL: name: ashr_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] ; GFX10-LABEL: name: ashr_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ASHR_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ body: | ; GFX6-LABEL: name: ashr_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ body: | ; GFX6-LABEL: name: ashr_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ body: | ; GFX6-LABEL: name: ashr_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX7-LABEL: name: ashr_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_ASHR_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHR_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_ASHR_I64_e64_]] ; GFX8-LABEL: name: ashr_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX9-LABEL: name: ashr_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] ; GFX10-LABEL: name: ashr_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index 4ff4c919381fa..09677acbd095a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -85,30 +85,30 @@ body: | ; GFX8-LABEL: name: ashr_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -178,30 +178,30 @@ body: | ; GFX8-LABEL: name: ashr_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -223,31 +223,31 @@ body: | ; GFX8-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -423,30 +423,30 @@ body: | ; GFX8-LABEL: name: ashr_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX11-LABEL: name: ashr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir index d40aa14d73a3b..639027fd80b2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir @@ -79,16 +79,16 @@ body: | ; GFX9-LABEL: name: ashr_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ body: | ; GFX9-LABEL: name: ashr_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ body: | ; GFX9-LABEL: name: ashr_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] ; GFX10-LABEL: name: ashr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_ASHRREV_I16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir index 5132b76f96453..69dee063caa5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir @@ -19,37 +19,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s32_local ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_local ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -70,39 +70,39 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -125,37 +125,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s64_local ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_local ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -176,37 +176,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_gfx9_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64_gfx9 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir index c69abab500ab8..f4a9cade3a32b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir @@ -19,37 +19,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s32_region ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_region ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -70,39 +70,39 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_CMPST_RTN_B32_]] ; GFX11-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[COPY]], [[COPY2]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX11-NEXT: $vgpr0 = COPY [[DS_CMPSTORE_RTN_B32_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPSTORE_RTN_B32 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[DS_CMPSTORE_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -125,37 +125,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s64_region ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_region ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -176,37 +176,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX6-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX7-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX9-NEXT: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPST_RTN_B64_]] ; GFX11-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX11: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[COPY]], [[COPY2]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[DS_CMPSTORE_RTN_B64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[DS_CMPSTORE_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPSTORE_RTN_B64 [[PRED_COPY]], [[PRED_COPY2]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_CMPSTORE_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index 782cc5553881e..c11ce4dd41ac7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -17,31 +17,31 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -61,27 +61,27 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -100,51 +100,51 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -166,47 +166,47 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -227,51 +227,51 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -293,47 +293,47 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -354,51 +354,51 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -420,47 +420,47 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -481,71 +481,71 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -567,67 +567,67 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -648,31 +648,31 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -692,27 +692,27 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -731,51 +731,51 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -797,47 +797,47 @@ body: | ; GFX7-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir index 930d2789f2ea9..34407d27065ce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -18,43 +18,43 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -74,38 +74,38 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -124,53 +124,53 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -192,48 +192,48 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -254,63 +254,63 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -332,58 +332,58 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -404,63 +404,63 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -482,58 +482,58 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -554,84 +554,84 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -653,79 +653,79 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -746,43 +746,43 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -802,38 +802,38 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -852,63 +852,63 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] + ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[PRED_COPY]], [[PRED_COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -930,58 +930,58 @@ body: | ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir index 38ec2fd4c7ef4..375958cf86f11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -20,18 +20,18 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_local ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -39,7 +39,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -59,16 +59,16 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_ADD_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -94,18 +94,18 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -115,7 +115,7 @@ body: | ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir index a41c83ce60953..288b5f5b0f99c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -20,18 +20,18 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_region ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -39,7 +39,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -59,16 +59,16 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_ADD_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -94,18 +94,18 @@ body: | ; GFX8-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX8-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} @@ -115,7 +115,7 @@ body: | ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir index 1208ff59009ce..b79981bb841ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -19,26 +19,26 @@ body: | ; GFX6-LABEL: name: atomicrmw_xchg_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) @@ -58,28 +58,28 @@ body: | ; GFX6-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir index 750465a0a6dbb..74d9728fcbe03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir @@ -19,26 +19,26 @@ body: | ; GFX6-LABEL: name: atomicrmw_xchg_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) @@ -58,28 +58,28 @@ body: | ; GFX6-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GFX6-NEXT: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX6-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[V_ADD_CO_U32_e64_]], [[PRED_COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX7-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[PRED_COPY]], [[PRED_COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir index 6703256c15646..90a252ce87f05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitcast.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN --- @@ -7,13 +8,15 @@ legalized: true regBankSelected: true tracksRegLiveness: true -# GCN-LABEL: name: bitcast -# GCN: [[A:%[0-9]+]]:vgpr_32 = COPY $vgpr0 -# GCN: S_ENDPGM 0, implicit [[A]] body: | bb.0: liveins: $vgpr0 + ; GCN-LABEL: name: bitcast + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_BITCAST %0 %2:vgpr(s32) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir index 9bbf23efd865e..e67a521bb77a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bitreverse.mir @@ -12,8 +12,8 @@ body: | ; CHECK-LABEL: name: bitreverse_i32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_BREV_B32_:%[0-9]+]]:sreg_32 = S_BREV_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_BITREVERSE %0 @@ -31,8 +31,8 @@ body: | ; CHECK-LABEL: name: bitreverse_i32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_BITREVERSE %0 @@ -50,8 +50,8 @@ body: | ; CHECK-LABEL: name: bitreverse_i32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_BITREVERSE %0 @@ -69,8 +69,8 @@ body: | ; CHECK-LABEL: name: bitreverse_i64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_BITREVERSE %0 @@ -88,11 +88,11 @@ body: | ; CHECK-LABEL: name: bitreverse_i64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -114,11 +114,11 @@ body: | ; CHECK-LABEL: name: bitreverse_i64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY2]], implicit $exec - ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[V_BFREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY2]], implicit $exec + ; CHECK-NEXT: [[V_BFREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e64 [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_BFREV_B32_e64_]], %subreg.sub0, [[V_BFREV_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir index cefca22aaee35..6510c5f9fd765 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -19,11 +19,11 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -52,7 +52,7 @@ body: | ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: $scc = COPY [[DEF]] + ; GCN-NEXT: $scc = PRED_COPY [[DEF]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -78,11 +78,11 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} @@ -118,10 +118,10 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: $vcc = COPY [[V_CMP_EQ_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: $vcc = PRED_COPY [[V_CMP_EQ_U32_e64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -207,10 +207,10 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: $vcc = COPY [[V_CMP_CLASS_F32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: $vcc = PRED_COPY [[V_CMP_CLASS_F32_e64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -238,14 +238,14 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: %5:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY3]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_F32_e64_]], implicit-def dead $scc + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -277,15 +277,15 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY2]], implicit-def $scc ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_1]] + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -317,13 +317,13 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1 ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc - ; GCN-NEXT: $vcc = COPY [[S_AND_B64_]] + ; GCN-NEXT: $vcc = PRED_COPY [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir index 4e2482078d860..15687a301ca02 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir @@ -13,18 +13,18 @@ body: | ; GFX7-LABEL: name: bswap_i32_vv ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 8, implicit $exec - ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 24, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY]], 8, implicit $exec + ; GFX7-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY]], 24, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935 ; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]] ; GFX8-LABEL: name: bswap_i32_vv ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051 - ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec + ; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[PRED_COPY]], [[S_MOV_B32_]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_BSWAP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir index 0af2d08ab9ce8..0c3e371ebeae4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -15,9 +15,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -38,9 +38,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -64,18 +64,18 @@ body: | ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hl ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[PRED_COPY]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hl ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY1]], [[COPY]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[PRED_COPY1]], [[PRED_COPY]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -98,9 +98,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -125,9 +125,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -148,9 +148,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -171,8 +171,8 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 @@ -192,9 +192,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF @@ -216,8 +216,8 @@ body: | ; GFX9PLUS: liveins: $sgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_IMPLICIT_DEF %1:sgpr(s32) = COPY $sgpr1 @@ -238,8 +238,8 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_undef ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 @@ -260,8 +260,8 @@ body: | ; GFX9PLUS: liveins: $sgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr1 @@ -282,9 +282,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_zero ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -305,8 +305,8 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_lshr16_zero ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], 16, implicit-def $scc ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr0 @@ -330,11 +330,11 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh_multi_use ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -357,11 +357,11 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_lhs ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[COPY1]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[PRED_COPY1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -385,21 +385,21 @@ body: | ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_1]] ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_PACK_HL_B32_B16_]], implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -423,11 +423,11 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_lh_wrong_shift_amt ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_LSHR_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -450,11 +450,11 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_pack_hh_wrong_shift_amt ; GFX9PLUS: liveins: $sgpr0, $sgpr1 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9PLUS-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9PLUS-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 @@ -659,9 +659,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_var_constant ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 456 @@ -683,8 +683,8 @@ body: | ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 456 %1:sgpr(s32) = COPY $sgpr0 @@ -705,9 +705,9 @@ body: | ; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_var_0 ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[PRED_COPY]], [[S_MOV_B32_]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -729,8 +729,8 @@ body: | ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9PLUS-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[PRED_COPY]] ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir index f4531bc83877d..ac43b14afa504 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: test_build_vector_v_v2s32_s_s32_v_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -60,9 +60,9 @@ body: | ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_s_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -83,9 +83,9 @@ body: | ; GCN-LABEL: name: test_build_vector_s_v2s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -106,10 +106,10 @@ body: | ; GCN-LABEL: name: test_build_vector_s_v2s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %4:sgpr(<2 x s64>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir index 355ffd1456dc3..86b9960f7fba2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -14,10 +14,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v4s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -36,10 +36,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v4s16_s_v2s16_v_v2s16 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -58,10 +58,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v4s16_v_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 %2:vgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -80,10 +80,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -102,11 +102,11 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_s96_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -126,11 +126,11 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_s96_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -150,12 +150,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s16_s_v2s16_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -176,12 +176,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v8s16_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -202,10 +202,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -224,10 +224,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v8s16_v_v4s16_v_v4s16 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 %2:vgpr(<8 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -246,13 +246,13 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_s160_s_v2s16_s_v2s16_s_v2s16_s_v2s16_s_v2s16 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = COPY $sgpr2 @@ -274,13 +274,13 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_s160_v_v2s16_v_v2s16_v_v2s16_v_v2s16_v_v2s16 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = COPY $vgpr2 @@ -302,10 +302,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -326,10 +326,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v12s16_v_v4s16_v_v4s16_v_v4s16 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -350,12 +350,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v16s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 @@ -376,10 +376,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v8s16_s_v8s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<8 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<8 x s16>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<16 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -398,10 +398,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v32s16_s_v12s16_s_v12s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<16 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<16 x s16>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<32 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -420,16 +420,16 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v32s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16_s_v4s16 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $sgpr12_sgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr12_sgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x s16>) = COPY $sgpr4_sgpr5 @@ -454,16 +454,16 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v512_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64_v_v64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 %2:vgpr(<4 x s16>) = COPY $vgpr4_vgpr5 @@ -493,10 +493,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4s32_s_v2s32_s_v2s32 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 %4:sgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -515,10 +515,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v4s32_v_v2s32_v_v2s32 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 %2:vgpr(<4 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -537,12 +537,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s32_s_v2s32_s_v2s32_s_v2s32_s_v2s32 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 %2:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 @@ -564,10 +564,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s32_s_v4s32_s_v4s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<8 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -586,10 +586,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v16s32_s_v8s32_s_v8s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<8 x s32>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<16 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -608,16 +608,16 @@ body: | ; GCN-LABEL: name: test_concat_vectors_v_v16s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32_v_v2s32 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 %2:vgpr(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -642,10 +642,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1 @@ -664,10 +664,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %3:sgpr(<4 x s64>) = G_CONCAT_VECTORS %0, %1 @@ -708,10 +708,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v4s64_s_v4s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1 @@ -730,12 +730,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v2s64_s_v2s64_s_v2s64_s_v2s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7, [[COPY2]], %subreg.sub8_sub9_sub10_sub11, [[COPY3]], %subreg.sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7, [[PRED_COPY2]], %subreg.sub8_sub9_sub10_sub11, [[PRED_COPY3]], %subreg.sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x s64>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(<2 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -756,10 +756,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1 @@ -778,10 +778,10 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4p3_s_v2p3_s_v2p3 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %2:sgpr(<4 x p3>) = G_CONCAT_VECTORS %0, %1 @@ -800,12 +800,12 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8p3_s_v2p3_s_v2p3_v2p3_s_v2p3 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 %2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index 7375475d65714..517bc58a12ff8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -17,17 +17,17 @@ body: | ; WAVE64-LABEL: name: copy ; WAVE64: liveins: $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy ; WAVE32: liveins: $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF @@ -46,25 +46,25 @@ body: | ; WAVE64-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -86,30 +86,30 @@ body: | ; WAVE64-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]] - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY4]], implicit $exec + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY3]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -134,21 +134,21 @@ body: | ; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64_xexec = PRED_COPY $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY $scc + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -169,13 +169,13 @@ body: | ; WAVE64-LABEL: name: copy_sgpr_no_type ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32_xm0 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_sgpr_no_type ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32_xm0 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_32_xm0 = COPY $sgpr0 %1:sreg_32_xm0 = COPY %0 S_ENDPGM 0, implicit %1 @@ -195,13 +195,13 @@ body: | ; WAVE64-LABEL: name: copy_vgpr_no_type ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vgpr_no_type ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY %0 S_ENDPGM 0, implicit %1 @@ -221,13 +221,13 @@ body: | ; WAVE64-LABEL: name: copy_maybe_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_maybe_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_64_xexec = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = COPY %0 S_ENDPGM 0, implicit %1 @@ -249,15 +249,15 @@ body: | ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[PRED_COPY]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[PRED_COPY]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s32) = COPY $sgpr0 %1:vcc(s1) = G_TRUNC %0 %2:vcc(s1) = COPY %1 @@ -278,14 +278,14 @@ body: | ; WAVE64-LABEL: name: copy_s64_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: $vcc = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: $vcc = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc ; WAVE32-LABEL: name: copy_s64_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: $vcc = COPY [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: $vcc = PRED_COPY [[PRED_COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo %0:sgpr(s64) = COPY $sgpr0_sgpr1 $vcc = COPY %0 @@ -306,14 +306,14 @@ body: | ; WAVE64-LABEL: name: copy_s32_to_vcc_lo ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: $vcc_lo = COPY [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: $vcc_lo = PRED_COPY [[PRED_COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc ; WAVE32-LABEL: name: copy_s32_to_vcc_lo ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: $vcc_lo = COPY [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: $vcc_lo = PRED_COPY [[PRED_COPY]] ; WAVE32-NEXT: S_ENDPGM 0, implicit $vcc_lo %0:sgpr(s32) = COPY $sgpr0 $vcc_lo = COPY %0 @@ -334,13 +334,13 @@ body: | ; WAVE64-LABEL: name: copy_vcc_to_s64 ; WAVE64: liveins: $vcc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $vcc + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vcc_to_s64 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $vcc + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s64) = COPY $vcc S_ENDPGM 0, implicit %0 @@ -359,13 +359,13 @@ body: | ; WAVE64-LABEL: name: copy_vcc_lo_to_s32 ; WAVE64: liveins: $vcc ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $vcc_lo + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] ; WAVE32-LABEL: name: copy_vcc_lo_to_s32 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $vcc_lo + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $vcc_lo S_ENDPGM 0, implicit %0 @@ -384,17 +384,17 @@ body: | ; WAVE64-LABEL: name: copy_s1_to_vcc ; WAVE64: liveins: $sgpr0_sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] ; WAVE32-LABEL: name: copy_s1_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir index c2c50e972df29..f7090f9f308f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir @@ -14,8 +14,8 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -35,8 +35,8 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 @@ -77,8 +77,8 @@ body: | ; CHECK-LABEL: name: ctlz_zero_undef_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir index 2509f08f9aaf6..4a5faa299241f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -14,8 +14,8 @@ body: | ; CHECK-LABEL: name: ctpop_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[PRED_COPY]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTPOP %0 @@ -35,8 +35,8 @@ body: | ; CHECK-LABEL: name: ctpop_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTPOP %0 @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: ctpop_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTPOP %0 @@ -77,9 +77,9 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_v_vv_commute0 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -101,9 +101,9 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_v_vv_commute1 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -126,10 +126,10 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_s_ss_commute0 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[PRED_COPY]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -151,9 +151,9 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_v_vs_commute0 ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,9 +176,9 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_v_sv_commute0 ; CHECK: liveins: $vgpr0, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -201,9 +201,9 @@ body: | ; CHECK-LABEL: name: add_ctpop_s32_s_sv_commute0 ; CHECK: liveins: $sgpr0, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -225,8 +225,8 @@ body: | ; CHECK-LABEL: name: ctpop_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_BCNT1_I32_B64_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 [[COPY]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_BCNT1_I32_B64_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 [[PRED_COPY]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BCNT1_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTPOP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir index da612c92fea6a..9a7eadf02b97e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir @@ -14,8 +14,8 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_ss ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -35,8 +35,8 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[PRED_COPY]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 @@ -77,8 +77,8 @@ body: | ; CHECK-LABEL: name: cttz_zero_undef_s64_ss ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_FF1_I32_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir index 649ccad17bdea..85081173fe959 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -18,18 +18,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v2s32 ; MOVREL: liveins: $sgpr0_sgpr1, $sgpr2 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v2s32 ; GPRIDX: liveins: $sgpr0_sgpr1, $sgpr2 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -49,18 +49,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v3s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v3s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr2 @@ -80,18 +80,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -111,18 +111,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -142,18 +142,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v16s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v16s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -173,18 +173,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -204,18 +204,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v2s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v2s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -235,18 +235,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v4s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v4s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -266,18 +266,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -297,18 +297,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -328,18 +328,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -361,22 +361,22 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -398,18 +398,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub7, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub7, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -431,22 +431,22 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -468,18 +468,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub2_sub3, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub2_sub3, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -501,18 +501,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub4_sub5, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: $m0 = COPY [[COPY1]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub4_sub5, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -534,22 +534,22 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[PRED_COPY]].sub0_sub1, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -571,17 +571,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v2s32 ; MOVREL: liveins: $vgpr0_vgpr1, $sgpr2 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32 ; GPRIDX: liveins: $vgpr0_vgpr1, $sgpr2 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -601,17 +601,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v3s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:sgpr(s32) = COPY $sgpr2 @@ -631,17 +631,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -661,17 +661,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -691,17 +691,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v16s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:sgpr(s32) = COPY $sgpr8 @@ -721,17 +721,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v32s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_1024 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_1024 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_]] %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -751,17 +751,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub1, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -783,21 +783,21 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -819,17 +819,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: $m0 = COPY [[COPY1]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY1]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub7, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -851,21 +851,21 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[PRED_COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -887,18 +887,18 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[PRED_COPY]].sub0, implicit $m0, implicit [[PRED_COPY]] ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = G_CONSTANT i32 0 @@ -918,17 +918,17 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[PRED_COPY]].sub0, implicit $m0, implicit $exec, implicit [[PRED_COPY]] ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[PRED_COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index 28836016640cc..3a902892ae3e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -9,38 +9,38 @@ body: | bb.0: ; CHECK-LABEL: name: extract512 ; CHECK: [[DEF:%[0-9]+]]:sgpr_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub15 - ; CHECK-NEXT: $sgpr0 = COPY [[COPY]] - ; CHECK-NEXT: $sgpr1 = COPY [[COPY1]] - ; CHECK-NEXT: $sgpr2 = COPY [[COPY2]] - ; CHECK-NEXT: $sgpr3 = COPY [[COPY3]] - ; CHECK-NEXT: $sgpr4 = COPY [[COPY4]] - ; CHECK-NEXT: $sgpr5 = COPY [[COPY5]] - ; CHECK-NEXT: $sgpr6 = COPY [[COPY6]] - ; CHECK-NEXT: $sgpr7 = COPY [[COPY7]] - ; CHECK-NEXT: $sgpr8 = COPY [[COPY8]] - ; CHECK-NEXT: $sgpr9 = COPY [[COPY9]] - ; CHECK-NEXT: $sgpr10 = COPY [[COPY10]] - ; CHECK-NEXT: $sgpr11 = COPY [[COPY11]] - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]] - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]] - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]] - ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub15 + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $sgpr8 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $sgpr9 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $sgpr10 = PRED_COPY [[PRED_COPY10]] + ; CHECK-NEXT: $sgpr11 = PRED_COPY [[PRED_COPY11]] + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]] + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]] + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]] + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 0 @@ -87,39 +87,39 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s_s32_s1024 ; CHECK: [[DEF:%[0-9]+]]:sgpr_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub30 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]], implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]], implicit [[COPY10]], implicit [[COPY11]], implicit [[COPY12]], implicit [[COPY13]], implicit [[COPY14]], implicit [[COPY15]], implicit [[COPY16]], implicit [[COPY17]], implicit [[COPY18]], implicit [[COPY19]], implicit [[COPY20]], implicit [[COPY21]], implicit [[COPY22]], implicit [[COPY23]], implicit [[COPY24]], implicit [[COPY25]], implicit [[COPY26]], implicit [[COPY27]], implicit [[COPY28]], implicit [[COPY29]], implicit [[COPY30]], implicit [[COPY31]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub30 + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]], implicit [[PRED_COPY8]], implicit [[PRED_COPY9]], implicit [[PRED_COPY10]], implicit [[PRED_COPY11]], implicit [[PRED_COPY12]], implicit [[PRED_COPY13]], implicit [[PRED_COPY14]], implicit [[PRED_COPY15]], implicit [[PRED_COPY16]], implicit [[PRED_COPY17]], implicit [[PRED_COPY18]], implicit [[PRED_COPY19]], implicit [[PRED_COPY20]], implicit [[PRED_COPY21]], implicit [[PRED_COPY22]], implicit [[PRED_COPY23]], implicit [[PRED_COPY24]], implicit [[PRED_COPY25]], implicit [[PRED_COPY26]], implicit [[PRED_COPY27]], implicit [[PRED_COPY28]], implicit [[PRED_COPY29]], implicit [[PRED_COPY30]], implicit [[PRED_COPY31]] %0:sgpr(s1024) = G_IMPLICIT_DEF %1:sgpr(s32) = G_EXTRACT %0:sgpr, 0 %2:sgpr(s32) = G_EXTRACT %0:sgpr, 32 @@ -168,9 +168,9 @@ body: | bb.0: ; CHECK-LABEL: name: extract_sgpr_s64_from_s128 ; CHECK: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub2_sub3 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sgpr(s128) = G_IMPLICIT_DEF %1:sgpr(s64) = G_EXTRACT %0, 0 %2:sgpr(s64) = G_EXTRACT %0, 64 @@ -189,11 +189,11 @@ body: | ; CHECK-LABEL: name: extract_sgpr_s96_from_s128 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY1]].sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY]].sub1_sub2_sub3 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = G_EXTRACT %0, 0 %2:sgpr(s96) = G_EXTRACT %0, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir index e5ba07bbdad8d..6f385443a3c03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -23,31 +23,31 @@ body: | ; SI-LABEL: name: fabs_s32_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 $sgpr0 = COPY %1 @@ -71,31 +71,31 @@ body: | ; SI-LABEL: name: fabs_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -120,25 +120,25 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; VI-LABEL: name: fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; GFX9-LABEL: name: fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) ; GFX10-LABEL: name: fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FABS]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FABS %0 $vgpr0 = COPY %1 @@ -162,31 +162,31 @@ body: | ; SI-LABEL: name: fabs_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 $sgpr0 = COPY %1 @@ -210,31 +210,31 @@ body: | ; SI-LABEL: name: fabs_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; VI-LABEL: name: fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX9-LABEL: name: fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] ; GFX10-LABEL: name: fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -260,31 +260,31 @@ body: | ; SI-LABEL: name: fabs_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -315,32 +315,32 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FABS]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -366,31 +366,31 @@ body: | ; SI-LABEL: name: fabs_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; VI-LABEL: name: fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX9-LABEL: name: fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] ; GFX10-LABEL: name: fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -415,25 +415,25 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; VI-LABEL: name: fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; GFX9-LABEL: name: fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) ; GFX10-LABEL: name: fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FABS]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 $vgpr0 = COPY %1 @@ -460,42 +460,42 @@ body: | ; SI-LABEL: name: fabs_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 @@ -523,42 +523,42 @@ body: | ; SI-LABEL: name: fabs_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 @@ -694,41 +694,41 @@ body: | ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} ; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fabs_s64_ss_no_src_constraint ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[DEF]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = IMPLICIT_DEF %1:sgpr(s64) = G_FABS %0:sgpr(s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir index f477636812a34..8ed58e505f9b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir @@ -13,10 +13,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -38,10 +38,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vsv ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 @@ -63,10 +63,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvs ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -88,10 +88,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fabs_lhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -114,10 +114,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fabs_rhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 2, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -140,10 +140,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_lhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -167,10 +167,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_rhs ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 3, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -194,10 +194,10 @@ body: | ; GFX8-LABEL: name: fadd_s16_fneg_copy_sgpr ; GFX8: liveins: $vgpr0, $sgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: %5:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %5 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir index d69eb116fc652..ff4f86b3e0684 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir @@ -13,10 +13,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FADD %0, %1 @@ -36,10 +36,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_vsv ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_FADD %0, %1 @@ -59,10 +59,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvs ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_FADD %0, %1 @@ -82,10 +82,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fabs_lhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %0 @@ -106,9 +106,9 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fabs_rhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %1 @@ -129,10 +129,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_lhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %0 @@ -154,9 +154,9 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_rhs ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_FABS %1 @@ -179,11 +179,11 @@ body: | ; GFX6-LABEL: name: fadd_s32_fneg_copy_sgpr ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:sgpr(s32) = G_FNEG %1 @@ -207,10 +207,10 @@ body: | ; GFX6-LABEL: name: fadd_s32_copy_fneg_copy_fabs ; GFX6: liveins: $vgpr0, $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:sgpr(s32) = G_FABS %1 @@ -238,12 +238,12 @@ body: | ; GFX6-LABEL: name: fadd_s32_copy_fabs_sgpr_copy_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 2, [[PRED_COPY2]], 2, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FABS %0 @@ -267,12 +267,12 @@ body: | ; GFX6-LABEL: name: fadd_s32_copy_fneg_sgpr_copy_fneg_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vgpr_32 = nofpexcept V_ADD_F32_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 1, [[PRED_COPY2]], 1, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FNEG %0 @@ -296,12 +296,12 @@ body: | ; GFX6-LABEL: name: fadd_s32_copy_fneg_fabs_sgpr_copy_fneg_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 3, [[PRED_COPY2]], 3, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir index 94605b231246a..4923cd37a1ebe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir @@ -13,10 +13,10 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FADD %0, %1 @@ -36,10 +36,10 @@ body: | ; GFX6-LABEL: name: fadd_s64_vsv ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = G_FADD %0, %1 @@ -59,10 +59,10 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvs ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: %2:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 %2:vgpr(s64) = G_FADD %0, %1 @@ -82,10 +82,10 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fabs_lhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %0 @@ -106,9 +106,9 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fabs_rhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %3:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %1 @@ -129,10 +129,10 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_lhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_FABS %0 @@ -154,9 +154,9 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_rhs ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = G_FABS %1 @@ -180,11 +180,11 @@ body: | ; GFX6-LABEL: name: fadd_s64_fneg_copy_sgpr ; GFX6: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %4:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 %2:sgpr(s64) = G_FNEG %1 @@ -210,12 +210,12 @@ body: | ; GFX6-LABEL: name: fadd_s64_copy_fabs_sgpr_copy_fabs_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 2, [[PRED_COPY2]], 2, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FABS %0 @@ -239,12 +239,12 @@ body: | ; GFX6-LABEL: name: fadd_s64_copy_fneg_sgpr_copy_fneg_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %6:vreg_64 = nofpexcept V_ADD_F64_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 1, [[PRED_COPY2]], 1, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FNEG %0 @@ -268,12 +268,12 @@ body: | ; GFX6-LABEL: name: fadd_s64_copy_fneg_fabs_sgpr_copy_fneg_fabs_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6-NEXT: %8:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %8 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY1]] + ; GFX6-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 3, [[PRED_COPY2]], 3, [[PRED_COPY3]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index b72c4d64d9e0c..471c8631634f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -20,27 +20,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f16_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX10-LABEL: name: fcanonicalize_f16_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fcanonicalize_f16_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -63,27 +63,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f16_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX10-LABEL: name: fcanonicalize_f16_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fcanonicalize_f16_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -107,27 +107,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -150,27 +150,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -193,27 +193,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_v2f16_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX10-LABEL: name: fcanonicalize_v2f16_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX11-LABEL: name: fcanonicalize_v2f16_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -236,27 +236,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_v2f16_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX10-LABEL: name: fcanonicalize_v2f16_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX11-LABEL: name: fcanonicalize_v2f16_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -279,27 +279,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f64_denorm ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX9-LABEL: name: fcanonicalize_f64_denorm ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX10-LABEL: name: fcanonicalize_f64_denorm ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX11-LABEL: name: fcanonicalize_f64_denorm ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -322,27 +322,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f64_flush ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]] ; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX10-LABEL: name: fcanonicalize_f64_flush ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] ; GFX11-LABEL: name: fcanonicalize_f64_flush ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -364,27 +364,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -408,27 +408,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -451,27 +451,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -494,27 +494,27 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -537,35 +537,35 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 @@ -589,35 +589,35 @@ body: | ; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %3 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX8-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX11-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir index be4292399270e..c8e328d64879b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: fceil_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCEIL %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: fceil_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FCEIL %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ body: | ; CHECK-LABEL: name: fceil_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_CEIL_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FCEIL %0 $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ body: | ; CHECK-LABEL: name: fceil_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_CEIL_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_CEIL_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCEIL %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir index d9ba03f95a1cf..6c91ba9bf3020 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -17,8 +17,8 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GCN-NEXT: [[FCEIL:%[0-9]+]]:sreg_32(s16) = G_FCEIL [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FCEIL]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FCEIL]](s16) + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FCEIL %1 @@ -39,9 +39,9 @@ body: | ; GCN-LABEL: name: fceil_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCEIL %1 @@ -62,9 +62,9 @@ body: | ; GCN-LABEL: name: fceil_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCEIL %1 @@ -85,9 +85,9 @@ body: | ; GCN-LABEL: name: fceil_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CEIL_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir index 08aa5be48f282..0d8d7e7b61f09 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir @@ -42,17 +42,17 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 @@ -70,17 +70,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 @@ -98,17 +98,17 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 @@ -126,17 +126,17 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 @@ -154,17 +154,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 @@ -182,17 +182,17 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_one_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 @@ -210,17 +210,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 @@ -238,17 +238,17 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 @@ -266,17 +266,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 @@ -294,17 +294,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 @@ -322,17 +322,17 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 @@ -350,17 +350,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 @@ -378,17 +378,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 @@ -406,17 +406,17 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s32_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_une_s32_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 @@ -490,17 +490,17 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1 @@ -518,17 +518,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1 @@ -546,17 +546,17 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1 @@ -574,17 +574,17 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1 @@ -602,17 +602,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1 @@ -630,17 +630,17 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_one_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(one), %0, %1 @@ -658,17 +658,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_O_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_O_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1 @@ -686,17 +686,17 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1 @@ -714,17 +714,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1 @@ -742,17 +742,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1 @@ -770,17 +770,17 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1 @@ -798,17 +798,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1 @@ -826,17 +826,17 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1 @@ -854,17 +854,17 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s64_vv ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE64-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_une_s64_vv ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; WAVE32-NEXT: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vcc(s1) = G_FCMP floatpred(une), %0, %1 @@ -910,18 +910,18 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %2:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %2:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], %2, implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir index ac60a5eb146e7..970125278db94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -57,24 +57,24 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] ; GFX11-LABEL: name: fcmp_oeq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -94,24 +94,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] ; GFX11-LABEL: name: fcmp_ogt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_GT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -131,24 +131,24 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] ; GFX11-LABEL: name: fcmp_oge_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_GE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -168,24 +168,24 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] ; GFX11-LABEL: name: fcmp_olt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -205,24 +205,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ole_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -241,24 +241,24 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_one_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; GFX11-LABEL: name: fcmp_one_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -278,24 +278,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; GFX11-LABEL: name: fcmp_ord_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -315,24 +315,24 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_U_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] ; GFX11-LABEL: name: fcmp_uno_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_U_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -352,24 +352,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] ; GFX11-LABEL: name: fcmp_ueq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLG_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -389,24 +389,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ugt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -426,24 +426,24 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] ; GFX11-LABEL: name: fcmp_uge_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NLT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -463,24 +463,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] ; GFX11-LABEL: name: fcmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -500,24 +500,24 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] ; GFX11-LABEL: name: fcmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NGT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -537,24 +537,24 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: %4:sreg_64_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_une_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: S_ENDPGM 0, implicit %4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] ; GFX11-LABEL: name: fcmp_une_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NEQ_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -605,4 +605,3 @@ body: | %4:vcc(s1) = G_FCMP floatpred(true), %2, %3 S_ENDPGM 0, implicit %4 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir index 23b10218cbbe8..d5e10b1e481dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -14,8 +14,8 @@ body: | ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s32) = G_FCONSTANT float 1.0 %1:vgpr(s32) = G_FCONSTANT float 8.0 @@ -39,8 +39,8 @@ body: | ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1090519040 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 3212836864 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 3238002688 - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: $sgpr1 = COPY [[S_MOV_B32_1]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr1 = PRED_COPY [[S_MOV_B32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s32) = G_FCONSTANT float 1.0 %1:sgpr(s32) = G_FCONSTANT float 8.0 @@ -73,8 +73,8 @@ body: | ; GCN-NEXT: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1076101120, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] - ; GCN-NEXT: $vgpr2_vgpr3 = COPY [[REG_SEQUENCE1]] + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[REG_SEQUENCE]] + ; GCN-NEXT: $vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:vgpr(s64) = G_FCONSTANT double 1.0 %1:vgpr(s64) = G_FCONSTANT double 8.0 @@ -103,8 +103,8 @@ body: | ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1071382528 ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_MOV_B64_]] - ; GCN-NEXT: $sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_MOV_B64_]] + ; GCN-NEXT: $sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_1]], implicit [[REG_SEQUENCE1]] %0:sgpr(s64) = G_FCONSTANT double 1.0 %1:sgpr(s64) = G_FCONSTANT double 8.0 @@ -128,8 +128,8 @@ body: | ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] - ; GCN-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s16) = G_FCONSTANT half 1.0 %1:vgpr(s16) = G_FCONSTANT half 8.0 @@ -156,12 +156,12 @@ body: | ; GCN-LABEL: name: fconstant_s_s16 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 - ; GCN-NEXT: $sgpr0 = COPY [[COPY]] - ; GCN-NEXT: $sgpr1 = COPY [[COPY1]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s16) = G_FCONSTANT half 1.0 %1:sgpr(s16) = G_FCONSTANT half 8.0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir index ccda3ddb9bf05..6980573b499fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: fexp2_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FEXP2 %0 S_ENDPGM 0, implicit %1 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: fexp2_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FEXP2 %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir index b9251f2efe820..545f0af606520 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -17,8 +17,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FFLOOR:%[0-9]+]]:sreg_32(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FFLOOR]](s16) - ; VI-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FFLOOR]](s16) + ; VI-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FFLOOR %1 @@ -39,9 +39,9 @@ body: | ; VI-LABEL: name: ffloor_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -62,9 +62,9 @@ body: | ; VI-LABEL: name: ffloor_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FFLOOR %1 @@ -93,9 +93,9 @@ body: | ; VI-LABEL: name: ffloor_fneg_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir index 914edda54f5f5..a8a7f8b06fe9e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: ffloor_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: ffloor_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FFLOOR %1 @@ -77,9 +77,9 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %2:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FFLOOR %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir index 84734fc0622a0..40d2501d617bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: ffloor_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -51,9 +51,9 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %2:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FLOOR_F64_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 %2:vgpr(s64) = G_FFLOOR %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir index bf451f04c45de..310980e634390 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -17,27 +17,27 @@ body: | ; GFX6-LABEL: name: fma_f32 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %3:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %3 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -59,27 +59,27 @@ body: | ; GFX6-LABEL: name: fma_f32_fneg_src0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src0 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -102,27 +102,27 @@ body: | ; GFX6-LABEL: name: fma_f32_fneg_src1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src1 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -145,27 +145,27 @@ body: | ; GFX6-LABEL: name: fma_f32_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -188,27 +188,27 @@ body: | ; GFX6-LABEL: name: fma_f32_fabs_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_fabs_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %4:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %4 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -231,27 +231,27 @@ body: | ; GFX6-LABEL: name: fma_f32_copy_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %5 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX9-DL-LABEL: name: fma_f32_copy_fneg_src2 ; GFX9-DL: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DL-NEXT: {{ $}} - ; GFX9-DL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-DL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-DL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX9-DL-NEXT: S_ENDPGM 0, implicit %5 + ; GFX9-DL-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-DL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-DL-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-DL-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX9-DL-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] ; GFX10-LABEL: name: fma_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: %5:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: S_ENDPGM 0, implicit %5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_FMA_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir index 10a5dbe9a6b89..7c42ce8af35c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir @@ -15,18 +15,18 @@ body: | ; GFX6-LABEL: name: fmad_f32 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] ; GFX10-LABEL: name: fmad_f32 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -49,18 +49,18 @@ body: | ; GFX6-LABEL: name: fmad_f32_fneg_src0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src0 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -84,18 +84,18 @@ body: | ; GFX6-LABEL: name: fmad_f32_fneg_src1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src1 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -119,18 +119,18 @@ body: | ; GFX6-LABEL: name: fmad_f32_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -154,18 +154,18 @@ body: | ; GFX6-LABEL: name: fmad_f32_fabs_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_fabs_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 2, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -189,18 +189,18 @@ body: | ; GFX6-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] ; GFX10-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 1, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir index f3021ca75aed3..7236b19e9932e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir @@ -16,23 +16,23 @@ body: | ; GFX7-LABEL: name: fmaxnum_ieee_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -86,23 +86,23 @@ body: | ; GFX7-LABEL: name: fmaxnum_ieee_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir index ec50b56bebc76..f1e225a4c2c9a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -16,17 +16,17 @@ body: | ; CHECK-LABEL: name: fmaxnum_ieee_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ body: | ; CHECK-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir index 839351fd7f0e9..81782a78f7a0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir @@ -15,10 +15,10 @@ body: | ; GFX9-LABEL: name: fmaxnum_ieee_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMAXNUM_IEEE %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir index 467c2914d2b4b..a421cce96ba62 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir @@ -17,23 +17,23 @@ body: | ; GFX7-LABEL: name: fmaxnum_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -85,23 +85,23 @@ body: | ; GFX7-LABEL: name: fmaxnum_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MAX_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MAX_F64_e64_]], implicit [[V_MAX_F64_e64_1]], implicit [[V_MAX_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir index 27f275b58eceb..6501f94bcd04f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -16,17 +16,17 @@ body: | ; CHECK-LABEL: name: fmaxnum_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ body: | ; CHECK-LABEL: name: fmaxnum_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX11-LABEL: name: fmaxnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir index e54bba8b9b5a2..fd78740108c40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir @@ -17,10 +17,10 @@ body: | ; GFX9-LABEL: name: fmaxnum_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMAXNUM %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir index 3728907c43e7f..3a15413f8b08e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir @@ -16,23 +16,23 @@ body: | ; GFX7-LABEL: name: fminnum_ieee_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -86,23 +86,23 @@ body: | ; GFX7-LABEL: name: fminnum_ieee_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir index 17687956044cc..74a44201a40d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -16,17 +16,17 @@ body: | ; CHECK-LABEL: name: fminnum_ieee_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ body: | ; CHECK-LABEL: name: fminnum_ieee_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir index 63626318a1bbc..908f421f481a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir @@ -15,10 +15,10 @@ body: | ; GFX9-LABEL: name: fminnum_ieee_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMINNUM_IEEE %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir index bf9752b512632..c833cf2ce197c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir @@ -17,23 +17,23 @@ body: | ; GFX7-LABEL: name: fminnum_f32_f64_ieee_mode_on ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -85,23 +85,23 @@ body: | ; GFX7-LABEL: name: fminnum_f32_f64_ieee_mode_off ; GFX7: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7-NEXT: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GFX7-NEXT: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec - ; GFX7-NEXT: S_ENDPGM 0, implicit %10, implicit %11, implicit %12 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GFX7-NEXT: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MIN_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-NEXT: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[PRED_COPY5]], 0, [[PRED_COPY6]], 0, 0, implicit $mode, implicit $exec + ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_MIN_F64_e64_]], implicit [[V_MIN_F64_e64_1]], implicit [[V_MIN_F64_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir index 34b50f60f7ed7..ed5fbbf8cab66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -16,17 +16,17 @@ body: | ; CHECK-LABEL: name: fminnum_f16_vv ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -47,17 +47,17 @@ body: | ; CHECK-LABEL: name: fminnum_f16_v_fneg_v ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] ; GFX11-LABEL: name: fminnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_MIN_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[PRED_COPY]], 1, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir index 2750ca573b579..4d632141db0e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir @@ -15,10 +15,10 @@ body: | ; GFX9-LABEL: name: fminnum_v2f16_vv ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMINNUM %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index 3b83b9e3e98b5..157a765ec33c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -13,16 +13,16 @@ body: | ; GCN-LABEL: name: fmul_f32 ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY3]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -54,13 +54,13 @@ body: | ; GCN-LABEL: name: fmul_f64 ; GCN: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %4:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %5:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %4, implicit %5, implicit %6 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F64_e64_]], implicit [[V_MUL_F64_e64_1]], implicit [[V_MUL_F64_e64_2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -90,12 +90,12 @@ body: | ; GCN-LABEL: name: fmul_f16 ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %7, implicit %8, implicit %9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -129,28 +129,28 @@ body: | ; GCN-LABEL: name: fmul_modifiers_f32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %11:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 2, [[PRED_COPY]], 2, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 1, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 3, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[PRED_COPY]], 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_3]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_4]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_5]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_6]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_7]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_8]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[V_MUL_F32_e64_9]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(p1) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir index 96771e2ee4248..c8fc273a98a1f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir @@ -15,10 +15,10 @@ body: | ; GFX9-LABEL: name: fmul_v2f16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[PRED_COPY]], 8, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FMUL %0, %1 @@ -37,10 +37,10 @@ body: | ; GFX9-LABEL: name: fmul_v2f16_fneg_v_fneg_v ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: %4:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 11, [[PRED_COPY]], 11, [[PRED_COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_FNEG %0 @@ -61,15 +61,15 @@ body: | ; GFX9-LABEL: name: fmul_v2f16_fneg_lo_v_v ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[V_XOR_B32_e64_]], implicit $exec - ; GFX9-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[COPY2]], 16, [[V_AND_B32_e32_]], implicit $exec - ; GFX9-NEXT: %7:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GFX9-NEXT: S_ENDPGM 0, implicit %7 + ; GFX9-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[PRED_COPY2]], 16, [[V_AND_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[V_LSHL_OR_B32_e64_]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir index acf25f1060d2f..62a564e59e638 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -23,31 +23,31 @@ body: | ; SI-LABEL: name: fneg_s32_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 $sgpr0 = COPY %1 @@ -71,31 +71,31 @@ body: | ; SI-LABEL: name: fneg_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -120,25 +120,25 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; VI-LABEL: name: fneg_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; GFX9-LABEL: name: fneg_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) ; GFX10-LABEL: name: fneg_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FNEG]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FNEG %0 $vgpr0 = COPY %1 @@ -162,31 +162,31 @@ body: | ; SI-LABEL: name: fneg_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -212,31 +212,31 @@ body: | ; SI-LABEL: name: fneg_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -267,32 +267,32 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fneg_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fneg_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fneg_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[FNEG]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -318,31 +318,31 @@ body: | ; SI-LABEL: name: fneg_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; VI-LABEL: name: fneg_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX9-LABEL: name: fneg_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] ; GFX10-LABEL: name: fneg_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_XOR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FNEG %0 $sgpr0 = COPY %1 @@ -366,31 +366,31 @@ body: | ; SI-LABEL: name: fneg_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -415,25 +415,25 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; VI-LABEL: name: fneg_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; GFX9-LABEL: name: fneg_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) ; GFX10-LABEL: name: fneg_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] - ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FNEG]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FNEG %0 $vgpr0 = COPY %1 @@ -460,42 +460,42 @@ body: | ; SI-LABEL: name: fneg_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FNEG %0 @@ -523,42 +523,42 @@ body: | ; SI-LABEL: name: fneg_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 @@ -627,30 +627,30 @@ body: | ; SI-LABEL: name: fneg_fabs_s32_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_s32_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_s32_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_s32_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FABS %0 @@ -676,30 +676,30 @@ body: | ; SI-LABEL: name: fneg_fabs_s32_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 @@ -779,31 +779,31 @@ body: | ; SI-LABEL: name: fneg_fabs_s16_ss ; SI: liveins: $sgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FABS %1 @@ -831,35 +831,35 @@ body: | ; SI-LABEL: name: fneg_fabs_s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; SI-NEXT: $vgpr0 = COPY [[COPY1]] + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; VI-LABEL: name: fneg_fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; VI-NEXT: $vgpr0 = COPY [[COPY1]] + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; GFX9-LABEL: name: fneg_fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] ; GFX10-LABEL: name: fneg_fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]] + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_OR_B32_e64_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FABS %1 @@ -893,8 +893,8 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; VI-LABEL: name: fneg_fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -902,8 +902,8 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; VI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX9-LABEL: name: fneg_fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -911,8 +911,8 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; GFX10-LABEL: name: fneg_fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -920,8 +920,8 @@ body: | ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FNEG1]](s16) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FNEG %1 @@ -948,31 +948,31 @@ body: | ; SI-LABEL: name: fneg_fabs_v2s16_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; VI-LABEL: name: fneg_fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX9-LABEL: name: fneg_fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] ; GFX10-LABEL: name: fneg_fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_OR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = G_FABS %0 %2:sgpr(<2 x s16>) = G_FNEG %1 @@ -997,31 +997,31 @@ body: | ; SI-LABEL: name: fneg_fabs_v2s16_vv ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; VI-LABEL: name: fneg_fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: fneg_fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: fneg_fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %0 @@ -1051,7 +1051,7 @@ body: | ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; SI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; VI-LABEL: name: fneg_fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -1059,7 +1059,7 @@ body: | ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; GFX9-LABEL: name: fneg_fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -1067,7 +1067,7 @@ body: | ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) ; GFX10-LABEL: name: fneg_fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -1075,7 +1075,7 @@ body: | ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec - ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[V_XOR_B32_e64_]](<2 x s16>) %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = G_FABS %0 %2:vgpr(<2 x s16>) = G_FNEG %1 @@ -1103,42 +1103,42 @@ body: | ; SI-LABEL: name: fneg_fabs_s64_ss ; SI: liveins: $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 @@ -1167,42 +1167,42 @@ body: | ; SI-LABEL: name: fneg_fabs_s64_vv ; SI: liveins: $vgpr0_vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; VI-LABEL: name: fneg_fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: fneg_fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: fneg_fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index 7d2c3f4688dd4..ceb37b26e116a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -16,21 +16,21 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -49,21 +49,21 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %1 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FPTOSI %0 $vgpr0 = COPY %1 @@ -82,21 +82,21 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] ; GFX11-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -116,24 +116,24 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -153,24 +153,24 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -190,30 +190,30 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -234,24 +234,24 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s1_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -272,24 +272,24 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s1_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOSI %1 @@ -310,30 +310,30 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] ; GFX11-LABEL: name: fptosi_s16_to_s1_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index 09a886255048c..22765034f105d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -16,33 +16,33 @@ body: | ; GCN-LABEL: name: fptoui ; GCN: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GCN-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; VI-LABEL: name: fptoui ; VI: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; VI-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX11-LABEL: name: fptoui ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -72,24 +72,24 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -109,24 +109,24 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -146,30 +146,30 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: $vgpr0 = COPY %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s32_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 @@ -190,24 +190,24 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s1_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -228,24 +228,24 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s1_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %2 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_FPTOUI %1 @@ -266,30 +266,30 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit %3 + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; VI-NEXT: S_ENDPGM 0, implicit %3 + ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec + ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] ; GFX11-LABEL: name: fptoui_s16_to_s1_fneg_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %3 + ; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_t16_e64_]], implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index a73aaa6f99d93..5b73d6b5ec13a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -15,20 +15,20 @@ body: | ; CHECK-LABEL: name: fract_f64_neg ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY3]], 1, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_FRACT_F64_e64_]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 @@ -65,20 +65,20 @@ body: | ; CHECK-LABEL: name: fract_f64_neg_abs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; CHECK-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[PRED_COPY3]], 3, [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_FRACT_F64_e64 0, [[V_ADD_F64_e64_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], [[V_FRACT_F64_e64_]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir index 6868705191d42..ce250882af198 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frame-index.mir @@ -13,7 +13,7 @@ body: | bb.0: ; GCN-LABEL: name: frame_index_s ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 - ; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_MOV_B32_]] %0:sgpr(p5) = G_FRAME_INDEX %stack.0 $sgpr0 = COPY %0 @@ -31,7 +31,7 @@ body: | bb.0: ; GCN-LABEL: name: frame_index_v ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_MOV_B32_e32_]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 $vgpr0 = COPY %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir index d20fd4ed067e1..d7eaf46c44cef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir @@ -16,13 +16,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0(s32) %2:vgpr(s1) = G_FREEZE %1 @@ -43,13 +43,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_agpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0(s32) %2:vgpr(s1) = G_FREEZE %1 @@ -70,19 +70,19 @@ body: | ; GFX6-LABEL: name: test_freeze_s1_vcc ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; GFX6-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; GFX6-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX10-LABEL: name: test_freeze_s1_vcc ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; GFX10-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32_xm0_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; GFX10-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vcc(s1) = G_ICMP intpred(eq), %0(s32), %1 @@ -103,13 +103,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s16_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0(s32) %2:vgpr(s16) = G_FREEZE %1 @@ -130,13 +130,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_vgpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -155,13 +155,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_sgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $sgpr0 = COPY %1(s32) @@ -180,13 +180,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_vgpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -205,13 +205,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_agpr ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_agpr ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -230,13 +230,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_agpr ; GFX6: liveins: $sgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_agpr ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -255,13 +255,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_agpr_to_vgpr ; GFX6: liveins: $agpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_agpr_to_vgpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:agpr(s32) = COPY $agpr0 %1:agpr(s32) = G_FREEZE %0 $vgpr0 = COPY %1(s32) @@ -280,13 +280,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s32_agpr_to_agpr ; GFX6: liveins: $agpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX6-NEXT: $agpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX6-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s32_agpr_to_agpr ; GFX10: liveins: $agpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0 - ; GFX10-NEXT: $agpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:agpr_32 = PRED_COPY $agpr0 + ; GFX10-NEXT: $agpr0 = PRED_COPY [[PRED_COPY]] %0:agpr(s32) = COPY $agpr0 %1:agpr(s32) = G_FREEZE %0 $agpr0 = COPY %1(s32) @@ -305,13 +305,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(s64) @@ -330,13 +330,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s128 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s128 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s128) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128) @@ -355,13 +355,13 @@ body: | ; GFX6-LABEL: name: test_freeze_256 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_256 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s256) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256) @@ -380,13 +380,13 @@ body: | ; GFX6-LABEL: name: test_freeze_s512 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_s512 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(s512) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512) @@ -405,13 +405,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(<2 x s32>) @@ -430,13 +430,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v3s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v3s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(<3 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>) @@ -455,13 +455,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v4s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v4s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<4 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>) @@ -480,13 +480,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v5s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v5s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[PRED_COPY]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr(<5 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>) @@ -505,13 +505,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v8s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v8s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = PRED_COPY [[PRED_COPY]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(<8 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>) @@ -530,13 +530,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v16s32 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v16s32 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[PRED_COPY]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(<16 x s32>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>) @@ -555,13 +555,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FREEZE %0 $vgpr0 = COPY %1(<2 x s16>) @@ -580,13 +580,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v4s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(<4 x s16>) @@ -605,13 +605,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v6s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v6s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY]] %0:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(<6 x s16>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>) @@ -630,13 +630,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v8s16 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v8s16 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<8 x s16>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>) @@ -655,13 +655,13 @@ body: | ; GFX6-LABEL: name: test_freeze_v2s64 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[PRED_COPY]] %0:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(<2 x s64>) = G_FREEZE %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>) @@ -680,13 +680,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p0 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p0 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p0) @@ -705,13 +705,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p1) @@ -730,13 +730,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(p2) = G_FREEZE %0 $vgpr0 = COPY %1(p2) @@ -755,13 +755,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p3 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_FREEZE %0 $vgpr0 = COPY %1(p3) @@ -780,13 +780,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p4) = COPY $vgpr0_vgpr1 %1:vgpr(p4) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p4) @@ -805,13 +805,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p5 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p5 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = G_FREEZE %0 $vgpr0 = COPY %1(p5) @@ -830,13 +830,13 @@ body: | ; GFX6-LABEL: name: test_freeze_p999 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] ; GFX10-LABEL: name: test_freeze_p999 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_FREEZE %0 $vgpr0_vgpr1 = COPY %1(p999) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir index aaed64f95b08c..e9cdec592ef7e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir @@ -15,9 +15,9 @@ body: | ; GCN-LABEL: name: frint_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FRINT %0 $vgpr0 = COPY %1 @@ -36,9 +36,9 @@ body: | ; GCN-LABEL: name: frint_s32_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FRINT %0 $vgpr0 = COPY %1 @@ -57,9 +57,9 @@ body: | ; GCN-LABEL: name: frint_fneg_s32_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FRINT %1 @@ -79,9 +79,9 @@ body: | ; GCN-LABEL: name: frint_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %1:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FRINT %0 $vgpr0_vgpr1 = COPY %1 @@ -100,9 +100,9 @@ body: | ; GCN-LABEL: name: frint_s64_fneg_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 %2:vgpr(s64) = G_FRINT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir index 0e6a3ccacd168..cf88ec6c2b848 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir @@ -17,8 +17,8 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GCN-NEXT: [[FRINT:%[0-9]+]]:sreg_32(s16) = G_FRINT [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FRINT]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32(s32) = PRED_COPY [[FRINT]](s16) + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY]](s32) %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s16) = G_FRINT %1 @@ -39,9 +39,9 @@ body: | ; GCN-LABEL: name: frint_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FRINT %1 @@ -62,9 +62,9 @@ body: | ; GCN-LABEL: name: frint_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FRINT %1 @@ -85,9 +85,9 @@ body: | ; GCN-LABEL: name: frint_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir index f2e1e91fc7038..2b08953c7250b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir @@ -18,10 +18,10 @@ body: | ; GCN-LABEL: name: fshr_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir index 54557ddb29840..4f7f84d1b9fc7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir @@ -15,38 +15,38 @@ body: | ; WAVE64-LABEL: name: i1_vcc_to_vcc_copy ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; WAVE64-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY5]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY5]], implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 ; WAVE32-LABEL: name: i1_vcc_to_vcc_copy ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY5]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY5]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -78,46 +78,46 @@ body: | ; WAVE64-LABEL: name: i1_sgpr_to_vcc_copy ; WAVE64: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; WAVE64-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 ; WAVE32-LABEL: name: i1_sgpr_to_vcc_copy ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; WAVE32-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[PRED_COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec + ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[PRED_COPY5]], implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY4]], 0, [[PRED_COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] + ; WAVE32-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[PRED_COPY6]], [[PRED_COPY7]], -1, 0, 15, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir index 8854fe0af7c85..ac18047d8c1d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -9,21 +9,21 @@ legalized: true regBankSelected: true # GCN: name: icmp_s32_s_mix -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 -# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 -# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2 -# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3 -# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4 -# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5 -# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6 -# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 +# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 +# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 +# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 +# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 +# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 +# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 +# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 # GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc -# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32 = COPY $scc +# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32 = PRED_COPY $scc # GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc -# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32 = COPY $scc -# GCN: $scc = COPY [[COND0]] +# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32 = PRED_COPY $scc +# GCN: $scc = PRED_COPY [[COND0]] # GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc -# GCN: $scc = COPY [[COND1]] +# GCN: $scc = PRED_COPY [[COND1]] # GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc body: | @@ -123,14 +123,14 @@ legalized: true regBankSelected: true # GCN-LABEL: name: icmp_s32_v_mix -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 -# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 -# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 -# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 -# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 -# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 -# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 +# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 +# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 +# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 +# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 +# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 +# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr9 # GCN: [[COND0:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] # GCN: [[COND1:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]] # GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]] @@ -220,8 +220,8 @@ legalized: true regBankSelected: true # GCN-LABEL: name: icmp_s32_vv -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 # GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]] body: | @@ -244,8 +244,8 @@ legalized: true regBankSelected: true # GCN-LABEL: name: icmp_s32_vs -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]] body: | @@ -268,8 +268,8 @@ legalized: true regBankSelected: true # GCN-LABEL: name: icmp_s32_sv -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] body: | @@ -292,8 +292,8 @@ legalized: true regBankSelected: true # GCN-LABEL: name: icmp_s32_or_vcc -# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 +# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 # GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]] body: | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir index 3c2c37513bba5..6237d33838092 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -16,23 +16,23 @@ body: | ; WAVE64-LABEL: name: icmp_eq_s16_sv ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -55,23 +55,23 @@ body: | ; WAVE64-LABEL: name: icmp_eq_s16_vs ; WAVE64: liveins: $sgpr0, $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -94,23 +94,23 @@ body: | ; WAVE64-LABEL: name: icmp_eq_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -133,23 +133,23 @@ body: | ; WAVE64-LABEL: name: icmp_ne_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ne_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; GFX11-LABEL: name: icmp_ne_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -172,23 +172,23 @@ body: | ; WAVE64-LABEL: name: icmp_slt_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; WAVE32-LABEL: name: icmp_slt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; GFX11-LABEL: name: icmp_slt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -211,23 +211,23 @@ body: | ; WAVE64-LABEL: name: icmp_sle_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; WAVE32-LABEL: name: icmp_sle_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; GFX11-LABEL: name: icmp_sle_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -250,23 +250,23 @@ body: | ; WAVE64-LABEL: name: icmp_ult_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; WAVE32-LABEL: name: icmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; GFX11-LABEL: name: icmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -289,23 +289,23 @@ body: | ; WAVE64-LABEL: name: icmp_ule_s16_vv ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; GFX11-LABEL: name: icmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -314,4 +314,3 @@ body: | %4:vcc(s1) = G_ICMP intpred(ule), %2, %3 S_ENDPGM 0, implicit %4 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir index 2eeb93d46b048..0443423e2526d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s64.mir @@ -31,11 +31,11 @@ body: | ; GFX8-LABEL: name: icmp_eq_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -62,11 +62,11 @@ body: | ; GFX8-LABEL: name: icmp_ne_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_LG_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_LG_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_ne_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -123,20 +123,20 @@ body: | ; GFX8-LABEL: name: icmp_eq_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -159,20 +159,20 @@ body: | ; GFX8-LABEL: name: icmp_ne_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_NE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ne_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_NE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_NE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -195,20 +195,20 @@ body: | ; GFX8-LABEL: name: icmp_sgt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sgt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -231,20 +231,20 @@ body: | ; GFX8-LABEL: name: icmp_sge_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -267,20 +267,20 @@ body: | ; GFX8-LABEL: name: icmp_slt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_slt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -303,20 +303,20 @@ body: | ; GFX8-LABEL: name: icmp_sle_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_I64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_sle_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_I64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LE_I64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_I64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -339,20 +339,20 @@ body: | ; GFX8-LABEL: name: icmp_ugt_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ugt_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GT_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GT_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -375,20 +375,20 @@ body: | ; GFX8-LABEL: name: icmp_uge_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_uge_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_GE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_GE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_GE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -411,20 +411,20 @@ body: | ; GFX8-LABEL: name: icmp_ult_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ult_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LT_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LT_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LT_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -447,20 +447,20 @@ body: | ; GFX8-LABEL: name: icmp_ule_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_ule_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_LE_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_LE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_LE_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -483,11 +483,11 @@ body: | ; GFX8-LABEL: name: icmp_eq_p0_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p0_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -513,11 +513,11 @@ body: | ; GFX8-LABEL: name: icmp_eq_p1_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p1_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -544,11 +544,11 @@ body: | ; GFX8-LABEL: name: icmp_eq_p999_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: S_CMP_EQ_U64 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: S_ENDPGM 0, implicit [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: S_CMP_EQ_U64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: S_ENDPGM 0, implicit [[PRED_COPY2]] ; GFX6-LABEL: name: icmp_eq_p999_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -575,20 +575,20 @@ body: | ; GFX8-LABEL: name: icmp_eq_p0_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p0_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 @@ -611,20 +611,20 @@ body: | ; GFX8-LABEL: name: icmp_eq_p1_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p1_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 @@ -647,20 +647,20 @@ body: | ; GFX8-LABEL: name: icmp_eq_p999_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX8-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] ; GFX6-LABEL: name: icmp_eq_p999_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CMP_EQ_U64_e64_]] - ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_CMP_EQ_U64_e64_]] + ; GFX6-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e32_]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir index 3d0d198fc7824..8578177608db8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -16,20 +16,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v2s32 ; MOVREL: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v2s32 ; GPRIDX: liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -50,20 +50,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v3s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v3s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr3 @@ -84,20 +84,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr3 @@ -118,20 +118,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v5s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v5s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] %0:sgpr(<5 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s32) = COPY $sgpr5 @@ -152,20 +152,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -186,20 +186,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v16s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16, $sgpr17 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr16 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr17 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v16s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16, $sgpr17 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr16 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr17 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr16 @@ -220,20 +220,20 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40, $sgpr41 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr41 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40, $sgpr41 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr40 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr41 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 @@ -254,20 +254,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v2s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v2s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -288,20 +288,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v4s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr10 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v4s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr10 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr8_sgpr9 @@ -322,20 +322,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s64_v8s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr16_sgpr17 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr18 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v8s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17, $sgpr18 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr16_sgpr17 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr18 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s64) = COPY $sgpr16_sgpr17 @@ -356,20 +356,20 @@ body: | ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr42 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr40_sgpr41 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr42 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr42 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr40_sgpr41 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr42 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[PRED_COPY]], [[PRED_COPY1]], 4, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s64) = COPY $sgpr40_sgpr41 @@ -390,19 +390,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; MOVREL: liveins: $vgpr0_vgpr1, $vgpr2, $sgpr3 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; GPRIDX: liveins: $vgpr0_vgpr1, $vgpr2, $sgpr3 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -423,19 +423,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s32) = COPY $vgpr3 @@ -456,19 +456,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s32) = COPY $vgpr3 @@ -489,19 +489,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5, $sgpr6 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, $vgpr5, $sgpr6 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_160 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr(s32) = COPY $vgpr5 @@ -522,19 +522,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -555,19 +555,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -590,23 +590,23 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 @@ -629,20 +629,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; MOVREL-NEXT: $m0 = COPY [[COPY2]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; MOVREL-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX-NEXT: $m0 = COPY [[COPY2]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[PRED_COPY2]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 11, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -665,24 +665,24 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; MOVREL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; MOVREL-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, $sgpr9 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; GPRIDX-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr9 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX-NEXT: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_ADD_I32_]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 @@ -707,20 +707,20 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; GPRIDX-NEXT: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0 ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 @@ -741,19 +741,19 @@ body: | ; MOVREL-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; MOVREL-NEXT: {{ $}} - ; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; MOVREL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; MOVREL-NEXT: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL-NEXT: $m0 = PRED_COPY [[S_MOV_B32_]] + ; MOVREL-NEXT: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], 3, implicit $m0, implicit $exec ; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GPRIDX-NEXT: {{ $}} - ; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GPRIDX-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX-NEXT: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[PRED_COPY]], [[PRED_COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec ; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index b563d4e1a9207..fa3eefe8ad9d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -28,7 +28,7 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG13:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13 ; CHECK-NEXT: [[INSERT_SUBREG14:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14 ; CHECK-NEXT: [[INSERT_SUBREG15:%[0-9]+]]:sgpr_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]] + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[INSERT_SUBREG15]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_IMPLICIT_DEF @@ -64,9 +64,9 @@ body: | ; CHECK-LABEL: name: insert_v_s64_v_s32_0 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -86,9 +86,9 @@ body: | ; CHECK-LABEL: name: insert_v_s64_v_s32_32 ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -108,9 +108,9 @@ body: | ; CHECK-LABEL: name: insert_s_s64_s_s32_0 ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -130,9 +130,9 @@ body: | ; CHECK-LABEL: name: insert_s_s64_s_s32_32 ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -152,9 +152,9 @@ body: | ; CHECK-LABEL: name: insert_s_s64_v_s32_32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -174,9 +174,9 @@ body: | ; CHECK-LABEL: name: insert_v_s64_s_s32_32 ; CHECK: liveins: $vgpr0_vgpr1, $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -196,9 +196,9 @@ body: | ; CHECK-LABEL: name: insert_v_s96_v_s64_0 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -218,9 +218,9 @@ body: | ; CHECK-LABEL: name: insert_v_s96_v_s64_32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = COPY $vgpr3_vgpr4 @@ -240,9 +240,9 @@ body: | ; CHECK-LABEL: name: insert_s_s96_s_s64_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -262,9 +262,9 @@ body: | ; CHECK-LABEL: name: insert_s_s96_s_s64_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -284,9 +284,9 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s64_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -321,9 +321,9 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s64_64 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -343,9 +343,9 @@ body: | ; CHECK-LABEL: name: insert_s_v256_v_s64_96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s64) = COPY $vgpr8_vgpr9 @@ -365,9 +365,9 @@ body: | ; CHECK-LABEL: name: insert_s_s256_s_s64_128 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -402,9 +402,9 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s96_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -424,9 +424,9 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s96_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -446,9 +446,9 @@ body: | ; CHECK-LABEL: name: insert_s_s160_s_s96_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -468,9 +468,9 @@ body: | ; CHECK-LABEL: name: insert_s_s160_s_s96_32 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -490,9 +490,9 @@ body: | ; CHECK-LABEL: name: insert_s_s160_s_s96_64 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -513,9 +513,9 @@ body: | ; CHECK-LABEL: name: insert_s_s256_s_s128_0 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub0_sub1_sub2_sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 @@ -536,9 +536,9 @@ body: | ; CHECK-LABEL: name: insert_v_s256_v_s128_32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub1_sub2_sub3_sub4 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -559,9 +559,9 @@ body: | ; CHECK-LABEL: name: insert_v_s256_v_s128_64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub2_sub3_sub4_sub5 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -582,9 +582,9 @@ body: | ; CHECK-LABEL: name: insert_v_s256_v_s128_96 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub3_sub4_sub5_sub6 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -605,9 +605,9 @@ body: | ; CHECK-LABEL: name: insert_v_s256_v_s128_128 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[PRED_COPY]], [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir index 8bfd92a85afa2..e136b12a02d2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 $vgpr0 = COPY %1 @@ -35,9 +35,9 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %1:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 $vgpr0 = COPY %1 @@ -56,9 +56,9 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_TRUNC_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: %1:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_TRUNC_F64_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $vgpr0_vgpr1 = PRED_COPY [[V_TRUNC_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir index 3f47959707476..187a7ec39e716 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_s16_vs ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 @@ -60,9 +60,9 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F16_e64 1, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FNEG %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir index 81fdf2fe0e78a..59ab3aba6f337 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir @@ -7,10 +7,10 @@ legalized: true regBankSelected: true # GCN-LABEL: name: inttoptr -# GCN: [[S64:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 -# GCN: [[V64:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +# GCN: [[S64:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 +# GCN: [[V64:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 # GCN: [[VAL:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S64]], 0, 0 -# GCN: [[V_VAL:%[0-9]+]]:vgpr_32 = COPY [[VAL]] +# GCN: [[V_VAL:%[0-9]+]]:vgpr_32 = PRED_COPY [[VAL]] # GCN: FLAT_STORE_DWORD [[V64]], [[V_VAL]] # diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index f7b50675cda5e..6101b061c579a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -18,27 +18,27 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -61,25 +61,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -102,25 +102,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) - ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -141,27 +141,27 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -184,25 +184,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -225,25 +225,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -266,25 +266,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -307,25 +307,25 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -346,67 +346,67 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p0) = G_PTR_ADD %0, %1 @@ -429,47 +429,47 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p0) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index 855745347e557..11aa7883b6e6c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -20,43 +20,43 @@ body: | ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -79,31 +79,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -126,31 +126,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_global_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -171,43 +171,43 @@ body: | ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -230,31 +230,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -277,31 +277,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -324,31 +324,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_global_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -371,31 +371,31 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_global_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -416,16 +416,16 @@ body: | ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -433,20 +433,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -454,35 +454,35 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -505,63 +505,63 @@ body: | ; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -584,16 +584,16 @@ body: | ; GFX6-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -601,20 +601,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -622,35 +622,35 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir index 2fe1c672829f0..edb4127808501 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -19,23 +19,23 @@ body: | ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -59,20 +59,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -96,20 +96,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -130,23 +130,23 @@ body: | ; GFX6-LABEL: name: load_atomic_local_s64_seq_cst ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -170,20 +170,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -207,20 +207,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -244,20 +244,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -281,20 +281,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p0) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -315,25 +315,25 @@ body: | ; GFX6-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index 4587ac9334ff2..31d79c9081f38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -20,27 +20,27 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -62,27 +62,27 @@ body: | ; GFX6-LABEL: name: load_constant_v2s16_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -103,27 +103,27 @@ body: | ; GFX6-LABEL: name: load_constant_v2s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -143,27 +143,27 @@ body: | ; GFX6-LABEL: name: load_constant_v2s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -183,27 +183,27 @@ body: | ; GFX6-LABEL: name: load_constant_v4s16_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -224,27 +224,27 @@ body: | ; GFX6-LABEL: name: load_constant_v4s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -265,27 +265,27 @@ body: | ; GFX6-LABEL: name: load_constant_s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -306,27 +306,27 @@ body: | ; GFX6-LABEL: name: load_constant_s64_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -347,27 +347,27 @@ body: | ; GFX6-LABEL: name: load_constant_v2s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -390,25 +390,25 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_constant_v2p1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_constant_v2p1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_constant_v2p1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -431,25 +431,25 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[LOAD]](s128) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -470,27 +470,27 @@ body: | ; GFX6-LABEL: name: load_constant_p3_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_p3_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_p3_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_p3_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (p3), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -511,27 +511,27 @@ body: | ; GFX6-LABEL: name: load_constant_p4_from_8 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_p4_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_p4_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_p4_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (p4), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -554,25 +554,25 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_constant_p999_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_constant_p999_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_constant_p999_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](p999) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -595,25 +595,25 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_constant_v2p3 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_constant_v2p3 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_constant_v2p3 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -634,27 +634,27 @@ body: | ; GFX6-LABEL: name: load_constant_v2s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 @@ -675,27 +675,27 @@ body: | ; GFX6-LABEL: name: load_constant_v4s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 @@ -716,27 +716,27 @@ body: | ; GFX6-LABEL: name: load_constant_v8s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 @@ -757,27 +757,27 @@ body: | ; GFX6-LABEL: name: load_constant_v8s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX7-LABEL: name: load_constant_v8s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX8-LABEL: name: load_constant_v8s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] ; GFX10-LABEL: name: load_constant_v8s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[S_LOAD_DWORDX8_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1 @@ -798,27 +798,27 @@ body: | ; GFX6-LABEL: name: load_constant_v16s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v16s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v16s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v16s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -839,27 +839,27 @@ body: | ; GFX6-LABEL: name: load_constant_v8s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v8s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v8s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v8s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) - ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[PRED_COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) + ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -884,27 +884,27 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 255, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1020, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1020 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -927,28 +927,28 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[PRED_COPY]], 256, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1024, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1024 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -971,30 +971,30 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], 1048575, 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048575 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1017,30 +1017,30 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[PRED_COPY]], 262144, 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048576 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1063,31 +1063,31 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; GFX10-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[PRED_COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1073741823 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1110,51 +1110,51 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_MOV_B64_]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], -1, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -1177,57 +1177,57 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) - ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4) - ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]], -524288, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: $sgpr0 = PRED_COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -524288 %2:sgpr(p4) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 798a019da1e11..a561801542ebe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -20,33 +20,33 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -67,33 +67,33 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_flat_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_flat_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX10-LABEL: name: load_flat_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX11-LABEL: name: load_flat_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -114,33 +114,33 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -161,33 +161,33 @@ body: | ; GFX7-LABEL: name: load_flat_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -208,33 +208,33 @@ body: | ; GFX7-LABEL: name: load_flat_v3s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX11-LABEL: name: load_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -255,33 +255,33 @@ body: | ; GFX7-LABEL: name: load_flat_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -302,33 +302,33 @@ body: | ; GFX7-LABEL: name: load_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -349,33 +349,33 @@ body: | ; GFX7-LABEL: name: load_flat_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -398,31 +398,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_flat_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -445,31 +445,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX11-LABEL: name: load_flat_s96 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -492,31 +492,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_flat_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -537,33 +537,33 @@ body: | ; GFX7-LABEL: name: load_flat_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_p3_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -584,33 +584,33 @@ body: | ; GFX7-LABEL: name: load_flat_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_p1_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -633,31 +633,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_flat_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_flat_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_flat_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX11-LABEL: name: load_flat_p999_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -680,31 +680,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_flat_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_flat_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_flat_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX11-LABEL: name: load_flat_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -725,33 +725,33 @@ body: | ; GFX7-LABEL: name: load_flat_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX11-LABEL: name: load_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -772,33 +772,33 @@ body: | ; GFX7-LABEL: name: load_flat_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -821,31 +821,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX11-LABEL: name: load_flat_v6s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -866,33 +866,33 @@ body: | ; GFX7-LABEL: name: load_flat_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -917,63 +917,63 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -996,63 +996,63 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1075,83 +1075,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1174,83 +1174,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1273,63 +1273,63 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1352,83 +1352,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1451,83 +1451,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1550,83 +1550,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1649,83 +1649,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1748,83 +1748,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1847,83 +1847,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1946,83 +1946,83 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX9-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir index 014617a3a34a1..fbfb75b0276af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -16,24 +16,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -56,24 +56,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -99,24 +99,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -142,54 +142,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX9-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX9-NEXT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX11-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -215,44 +215,44 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec - ; GFX10-NEXT: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY7]], [[PRED_COPY8]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY9]], [[PRED_COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -280,44 +280,44 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]] ; GFX10-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec - ; GFX10-NEXT: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10-NEXT: %zext:vreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub0 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub1 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY %zext.sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY5]], [[PRED_COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec - ; GFX10-NEXT: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY7]], [[PRED_COPY8]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY9]], [[PRED_COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[PRED_COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -343,24 +343,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4096 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -383,24 +383,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -423,54 +423,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -493,24 +493,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -533,34 +533,34 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294965247 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -582,24 +582,24 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[PRED_COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -621,54 +621,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -691,54 +691,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294971390 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -761,54 +761,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -830,54 +830,54 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], [[PRED_COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY3]], [[PRED_COPY4]], implicit-def $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -897,19 +897,19 @@ body: | bb.0: ; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[DEF]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = G_IMPLICIT_DEF %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -928,15 +928,15 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_undef_vgpr ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_undef_vgpr ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_undef_vgpr ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir index cbe594247f7d9..7e3a65945b82d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -23,55 +23,55 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_s32_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -92,55 +92,55 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_global_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_global_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] ; GFX10-LABEL: name: load_global_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] ; GFX11-LABEL: name: load_global_s32_from_2 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) $vgpr0 = COPY %1 @@ -161,55 +161,55 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %1 @@ -230,55 +230,55 @@ body: | ; GFX6-LABEL: name: load_global_v2s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_v2s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -299,55 +299,55 @@ body: | ; GFX6-LABEL: name: load_global_v4s32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-LABEL: name: load_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v4s32 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -370,43 +370,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_global_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -429,43 +429,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x s64>) ; GFX7-LABEL: name: load_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX7-FLAT-LABEL: name: load_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v2s64 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -488,43 +488,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_global_v2p1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX7-FLAT-LABEL: name: load_global_v2p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_global_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_global_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_global_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_global_v2p1 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -547,43 +547,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_global_s128 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_global_s128 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -606,43 +606,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_global_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_p3_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_p3_from_4 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -665,43 +665,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_global_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_p1_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_p1_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -724,43 +724,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_global_p999_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-FLAT-LABEL: name: load_global_p999_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_global_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_global_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_global_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX11-LABEL: name: load_global_p999_from_8 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -783,43 +783,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_global_v2p3 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-FLAT-LABEL: name: load_global_v2p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_global_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_global_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_global_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX11-LABEL: name: load_global_v2p3 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -842,43 +842,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] ; GFX11-LABEL: name: load_global_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -901,43 +901,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX11-LABEL: name: load_global_v4s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -960,43 +960,43 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<8 x s16>) ; GFX7-LABEL: name: load_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX7-FLAT-LABEL: name: load_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX11-LABEL: name: load_global_v8s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -1021,75 +1021,75 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1112,85 +1112,85 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1213,16 +1213,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1230,20 +1230,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1251,57 +1251,57 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1324,16 +1324,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1341,20 +1341,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1362,57 +1362,57 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1435,85 +1435,85 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1536,107 +1536,107 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1659,16 +1659,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1676,20 +1676,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1697,67 +1697,67 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1780,16 +1780,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1797,20 +1797,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -1818,67 +1818,67 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1901,107 +1901,107 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2024,107 +2024,107 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2147,16 +2147,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -2164,20 +2164,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -2185,87 +2185,87 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2288,16 +2288,16 @@ body: | ; GFX6-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -2305,20 +2305,20 @@ body: | ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-NEXT: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 @@ -2326,87 +2326,87 @@ body: | ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX7-FLAT-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX8-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) - ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX9-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY3]], [[PRED_COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir index da4f62b033050..e89d33bf68554 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir @@ -20,38 +20,38 @@ body: | ; GFX7-LABEL: name: load_global_v3s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -74,31 +74,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX7-FLAT-LABEL: name: load_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_global_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_global_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_global_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -121,31 +121,31 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX7-FLAT-LABEL: name: load_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir index ba7f1d29a0f86..85e1acc5b10c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -18,28 +18,28 @@ body: | ; GFX7-LABEL: name: load_local_v4s32_align16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] + ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_]] ; GFX9-LABEL: name: load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ_B128_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -60,28 +60,28 @@ body: | ; GFX7-LABEL: name: load_local_v4s32_align_8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -102,28 +102,28 @@ body: | ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 400 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -146,36 +146,36 @@ body: | ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4000 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -198,28 +198,28 @@ body: | ; GFX7-LABEL: name: load_local_v2s64 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v2s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v2s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v2s64 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -243,25 +243,25 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_local_v2p1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_local_v2p1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) ; GFX11-LABEL: name: load_local_v2p1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](<2 x p1>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -285,25 +285,25 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_local_s128 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_local_s128 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) ; GFX11-LABEL: name: load_local_s128 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[LOAD]](s128) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -324,28 +324,28 @@ body: | ; GFX7-LABEL: name: load_local_v8s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v8s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v8s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] ; GFX11-LABEL: name: load_local_v8s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index ab3c8f507fdfe..b1723ac17b348 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -20,29 +20,29 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_s32_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -63,29 +63,29 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX7-LABEL: name: load_local_s32_from_2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: load_local_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -109,29 +109,29 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -152,29 +152,29 @@ body: | ; GFX6-LABEL: name: load_local_v2s32 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v2s32 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v2s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v2s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -198,26 +198,26 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_local_v2s32_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -238,29 +238,29 @@ body: | ; GFX6-LABEL: name: load_local_s64 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_s64 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_s64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -284,26 +284,26 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -324,29 +324,29 @@ body: | ; GFX6-LABEL: name: load_local_p3_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p3_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p3_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -367,29 +367,29 @@ body: | ; GFX6-LABEL: name: load_local_p5_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p5_from_4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p5_from_4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -410,29 +410,29 @@ body: | ; GFX6-LABEL: name: load_local_p1_align8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_p1_align8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -456,26 +456,26 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_local_p1_align4 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -499,26 +499,26 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_local_p999_from_8 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_local_p999_from_8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_local_p999_from_8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](p999) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -542,26 +542,26 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_local_v2p3 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_local_v2p3 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_local_v2p3 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](<2 x p3>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -582,29 +582,29 @@ body: | ; GFX6-LABEL: name: load_local_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX6-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_v2s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-NEXT: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] ; GFX10-LABEL: name: load_local_v2s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -625,29 +625,29 @@ body: | ; GFX6-LABEL: name: load_local_v4s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX6-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v4s16 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-NEXT: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v4s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] ; GFX10-LABEL: name: load_local_v4s16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -692,31 +692,31 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -739,37 +739,37 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX7-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX7-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 @@ -795,37 +795,37 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65536 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -848,37 +848,37 @@ body: | ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] ; GFX10-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX10-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -1 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -906,26 +906,26 @@ body: | ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[PRED_COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[PRED_COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1016 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -953,32 +953,32 @@ body: | ; GFX6-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6-NEXT: $m0 = S_MOV_B32 -1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] + ; GFX7-NEXT: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[V_ADD_CO_U32_e64_]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) + ; GFX7-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX9-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] ; GFX10-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[DS_READ2_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1020 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index f8ba02a188851..d3f14bff847b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -22,21 +22,21 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -60,21 +60,21 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_USHORT]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -98,21 +98,21 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -136,21 +136,21 @@ body: | ; GFX6-LABEL: name: load_private_p3_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_p3_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -174,21 +174,21 @@ body: | ; GFX6-LABEL: name: load_private_p5_from_4 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_p5_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -213,21 +213,21 @@ body: | ; GFX6-LABEL: name: load_private_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -255,23 +255,23 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -297,27 +297,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 @@ -346,23 +346,23 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -388,25 +388,25 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -432,25 +432,25 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -476,23 +476,23 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -518,27 +518,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -564,25 +564,25 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -608,25 +608,25 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[PRED_COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -652,27 +652,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -698,27 +698,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -744,27 +744,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -790,27 +790,27 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -834,14 +834,14 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -863,14 +863,14 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:sgpr(p5) = G_CONSTANT i32 16 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -892,14 +892,14 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -922,15 +922,15 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -954,13 +954,13 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_fi ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_fi ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -983,13 +983,13 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -1015,13 +1015,13 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:sgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(s32) = COPY %1 @@ -1051,17 +1051,17 @@ body: | ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SVS]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_UBYTE_SVS]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -1087,15 +1087,15 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_neg1 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) - ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 -1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 8ca6f24793a10..1de3a695eb277 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -17,7 +17,7 @@ legalized: true regBankSelected: true # GCN: body: -# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 +# GCN: [[PTR:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 # Immediate offset: # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 @@ -48,11 +48,11 @@ regBankSelected: true # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -62,11 +62,11 @@ regBankSelected: true # GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -80,11 +80,11 @@ regBankSelected: true # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = PRED_COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = PRED_COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 @@ -92,11 +92,11 @@ regBankSelected: true # Pointer loads # GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS0]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS0]] # GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS1]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS1]] # GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 -# GCN: $sgpr0_sgpr1 = COPY [[AS4]] +# GCN: $sgpr0_sgpr1 = PRED_COPY [[AS4]] body: | bb.0: @@ -171,8 +171,8 @@ body: | %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p1) = COPY $sgpr2_sgpr3 - ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 + ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = PRED_COPY $sgpr0_sgpr1 + ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = PRED_COPY $sgpr2_sgpr3 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 @@ -239,8 +239,8 @@ body: | # Test a load with a (register + immediate) offset. # GCN-LABEL: name: smrd_sgpr_imm{{$}} -# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1 -# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +# GFX9-DAG: %[[BASE:.*]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 +# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 # GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, name: smrd_sgpr_imm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir index a5041e5a5ecdd..2835b74d1351a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -17,37 +17,37 @@ body: | ; GFX6-LABEL: name: lshr_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX7-LABEL: name: lshr_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX8-LABEL: name: lshr_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX9-LABEL: name: lshr_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] ; GFX10-LABEL: name: lshr_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ body: | ; GFX6-LABEL: name: lshr_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ body: | ; GFX6-LABEL: name: lshr_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ body: | ; GFX6-LABEL: name: lshr_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX7-LABEL: name: lshr_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX8-LABEL: name: lshr_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX9-LABEL: name: lshr_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] ; GFX10-LABEL: name: lshr_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ body: | ; GFX6-LABEL: name: lshr_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX7-LABEL: name: lshr_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX8-LABEL: name: lshr_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX9-LABEL: name: lshr_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] ; GFX10-LABEL: name: lshr_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ body: | ; GFX6-LABEL: name: lshr_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ body: | ; GFX6-LABEL: name: lshr_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ body: | ; GFX6-LABEL: name: lshr_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX7-LABEL: name: lshr_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_LSHR_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHR_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHR_B64_e64_]] ; GFX8-LABEL: name: lshr_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX9-LABEL: name: lshr_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] ; GFX10-LABEL: name: lshr_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index ddb80f440a2fe..acb0dd1a685d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -83,30 +83,30 @@ body: | ; GFX8-LABEL: name: lshr_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,30 +176,30 @@ body: | ; GFX8-LABEL: name: lshr_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,31 +221,31 @@ body: | ; GFX8-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -421,30 +421,30 @@ body: | ; GFX8-LABEL: name: lshr_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX11-LABEL: name: lshr_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir index a545afb77a0a8..73f63c59dbbfe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir @@ -79,16 +79,16 @@ body: | ; GFX9-LABEL: name: lshr_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ body: | ; GFX9-LABEL: name: lshr_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ body: | ; GFX9-LABEL: name: lshr_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] ; GFX10-LABEL: name: lshr_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHRREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir index 698281caca245..8da21a221fb96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir @@ -13,18 +13,18 @@ body: | ; GFX10-LABEL: name: mad_u64_u32_vvv ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] ; GFX11-LABEL: name: mad_u64_u32_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -46,18 +46,18 @@ body: | ; GFX10-LABEL: name: mad_i64_i32_vvv ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] ; GFX11-LABEL: name: mad_i64_i32_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir index a030506f6af59..25c2d91dfc4b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -37,9 +37,9 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s64_s_s32_v_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -60,9 +60,9 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s64_v_s32_s_s32 ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -83,9 +83,9 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s64_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -105,11 +105,11 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s96_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -130,11 +130,11 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s96_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -155,12 +155,12 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s128_s_s32_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -182,12 +182,12 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s128_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -209,10 +209,10 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s128_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %4:sgpr(s128) = G_MERGE_VALUES %0, %1 @@ -232,10 +232,10 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s128_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s128) = G_MERGE_VALUES %0, %1 @@ -255,13 +255,13 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s160_s_s32_s_s32_s_s32_s_s32_s_s32 ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 @@ -284,13 +284,13 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s160_v_s32_v_s32_v_s32_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3, [[COPY4]], %subreg.sub4 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_160 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3, [[PRED_COPY4]], %subreg.sub4 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -313,10 +313,10 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s192_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -338,10 +338,10 @@ body: | ; GCN-LABEL: name: test_merge_values_v_s192_v_s64_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -363,12 +363,12 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s256_s_s64_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -390,10 +390,10 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s256_s_s128_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3, [[PRED_COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s128) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 %2:sgpr(s256) = G_MERGE_VALUES %0, %1 @@ -413,10 +413,10 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s512_s_s256_s_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[PRED_COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(s256) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %4:sgpr(s512) = G_MERGE_VALUES %0, %1 @@ -436,16 +436,16 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s512_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64_s_s64 ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $sgpr12_sgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $sgpr14_sgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr12_sgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr14_sgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -471,16 +471,16 @@ body: | ; GCN-LABEL: name: test_merge_values_v_v512_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64_v_s64 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr6_vgpr7 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY $vgpr14_vgpr15 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7, [[COPY4]], %subreg.sub8_sub9, [[COPY5]], %subreg.sub10_sub11, [[COPY6]], %subreg.sub12_sub13, [[COPY7]], %subreg.sub14_sub15 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr8_vgpr9 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr10_vgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr12_vgpr13 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr14_vgpr15 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1, [[PRED_COPY1]], %subreg.sub2_sub3, [[PRED_COPY2]], %subreg.sub4_sub5, [[PRED_COPY3]], %subreg.sub6_sub7, [[PRED_COPY4]], %subreg.sub8_sub9, [[PRED_COPY5]], %subreg.sub10_sub11, [[PRED_COPY6]], %subreg.sub12_sub13, [[PRED_COPY7]], %subreg.sub14_sub15 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = PRED_COPY [[REG_SEQUENCE]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -506,9 +506,9 @@ body: | ; GCN-LABEL: name: test_merge_values_rc_already_set_src_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr_32(s32) = COPY $vgpr0 %1:vgpr_32(s32) = COPY $vgpr1 @@ -529,9 +529,9 @@ body: | ; GCN-LABEL: name: test_merge_values_rc_already_set_dst_v_s64_v_s32_v_s32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -552,11 +552,11 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s1024_s_s256_s_s256_s_s256_s_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, [[DEF1]], %subreg.sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, %1:sgpr(s256) = G_IMPLICIT_DEF @@ -580,10 +580,10 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s1024_s_s512 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_1024 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[PRED_COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[REG_SEQUENCE]] %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s512) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %2:sgpr(s1024) = G_MERGE_VALUES %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir index c01c1a7c0d8fd..4adeba65603c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mul.mir @@ -12,9 +12,9 @@ body: | ; GCN-LABEL: name: mul_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY]], [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[PRED_COPY]], [[PRED_COPY1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MUL_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -33,9 +33,9 @@ body: | ; GCN-LABEL: name: mul_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -54,9 +54,9 @@ body: | ; GCN-LABEL: name: mul_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -75,9 +75,9 @@ body: | ; GCN-LABEL: name: mul_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index 337014db972a7..a05ae2f135f33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -17,21 +17,21 @@ body: | ; WAVE64-LABEL: name: or_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -57,16 +57,16 @@ body: | ; WAVE64-LABEL: name: or_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -89,16 +89,16 @@ body: | ; WAVE64-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -121,16 +121,16 @@ body: | ; WAVE64-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -153,16 +153,16 @@ body: | ; WAVE64-LABEL: name: or_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -183,16 +183,16 @@ body: | ; WAVE64-LABEL: name: or_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -213,16 +213,16 @@ body: | ; WAVE64-LABEL: name: or_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -243,16 +243,16 @@ body: | ; WAVE64-LABEL: name: or_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -273,16 +273,16 @@ body: | ; WAVE64-LABEL: name: or_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -303,16 +303,16 @@ body: | ; WAVE64-LABEL: name: or_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -333,16 +333,16 @@ body: | ; WAVE64-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -395,22 +395,22 @@ body: | ; WAVE64-LABEL: name: or_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] ; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] @@ -440,27 +440,27 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_OR_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_OR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -488,9 +488,9 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -499,15 +499,15 @@ body: | ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_OR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir index 3ed8e5ae144c2..4e9fc9dc2449a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -17,20 +17,20 @@ body: | ; GFX8-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ body: | ; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,20 +88,20 @@ body: | ; GFX8-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4, implicit %3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -125,19 +125,19 @@ body: | ; GFX8-LABEL: name: add_p3_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p3_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -161,19 +161,19 @@ body: | ; GFX8-LABEL: name: add_p5_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[PRED_COPY2]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p5_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD3_U32_e64_]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -197,20 +197,20 @@ body: | ; GFX8-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -234,20 +234,20 @@ body: | ; GFX8-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]] ; GFX9-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY2]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir index 6fa8441acc5ad..533c98f28eb46 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir @@ -17,20 +17,20 @@ body: | ; GFX8-LABEL: name: and_or_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX9-LABEL: name: and_or_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ body: | ; GFX8-LABEL: name: and_or_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,19 +88,19 @@ body: | ; GFX8-LABEL: name: and_or_s32_vgpr_vgpr_vgpr_commute ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY2]], [[V_AND_B32_e64_]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY2]], [[V_AND_B32_e64_]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_vgpr_vgpr_vgpr_commute ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_AND_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -123,22 +123,22 @@ body: | ; GFX8-LABEL: name: and_or_s32_sgpr_sgpr_vgpr ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_AND_B32_]] + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: and_or_s32_sgpr_sgpr_vgpr ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_AND_B32_]] + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir index e3ce1278b2cad..584d6efe43193 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir @@ -17,20 +17,20 @@ body: | ; GFX8-LABEL: name: or_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_1]] ; GFX9-LABEL: name: or_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_OR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -53,19 +53,19 @@ body: | ; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]] ; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR3_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -88,20 +88,20 @@ body: | ; GFX8-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]] ; GFX9-LABEL: name: or_s32_vgpr_vgpr_vgpr_multi_use ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_1]], implicit [[V_OR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir index cbb4b916b75bd..1ae22c640d19a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -13,10 +13,10 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -41,12 +41,12 @@ body: | ; GFX6-LABEL: name: smed3_s32_sss ; GFX6: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[PRED_COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_1]] %0:sgpr(s32) = COPY $sgpr0 @@ -71,11 +71,11 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv_multiuse0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -99,11 +99,11 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv_multiuse1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -127,12 +127,12 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv_multiuse2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -156,12 +156,12 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv_reuse_bounds ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_I32_e64_]], implicit [[V_MED3_I32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir index e0ce71e0b0919..06d5e7aff3996 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -16,29 +16,29 @@ body: | ; GFX8-LABEL: name: smed3_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]] ; GFX9-LABEL: name: smed3_s16_vvv ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -67,31 +67,31 @@ body: | ; GFX8-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -120,31 +120,31 @@ body: | ; GFX8-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -173,33 +173,33 @@ body: | ; GFX8-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] ; GFX9-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]] ; GFX11-LABEL: name: smed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[COPY2]], implicit $exec - ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir index acd9e1e8c716f..4749a1c41abb1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -13,10 +13,10 @@ body: | ; GFX6-LABEL: name: umed3_s32_vvv ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -41,12 +41,12 @@ body: | ; GFX6-LABEL: name: umed3_s32_sss ; GFX6: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[PRED_COPY2]], implicit-def $scc ; GFX6-NEXT: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_1]] %0:sgpr(s32) = COPY $sgpr0 @@ -71,11 +71,11 @@ body: | ; GFX6-LABEL: name: umed3_s32_vvv_multiuse0 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -99,11 +99,11 @@ body: | ; GFX6-LABEL: name: umed3_s32_vvv_multiuse1 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -127,12 +127,12 @@ body: | ; GFX6-LABEL: name: umed3_s32_vvv_multiuse2 ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX6-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -156,12 +156,12 @@ body: | ; GFX6-LABEL: name: smed3_s32_vvv_reuse_bounds ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec - ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[COPY]], [[COPY1]], [[COPY3]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec + ; GFX6-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_MED3_U32_e64_]], implicit [[V_MED3_U32_e64_1]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir index 7f4b7784942f3..1113b6139fe93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -16,29 +16,29 @@ body: | ; GFX8-LABEL: name: umed3_s16_vvv ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]] ; GFX9-LABEL: name: umed3_s16_vvv ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -67,31 +67,31 @@ body: | ; GFX8-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse0 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -120,31 +120,31 @@ body: | ; GFX8-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse1 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -173,33 +173,33 @@ body: | ; GFX8-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] ; GFX9-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec - ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]] ; GFX11-LABEL: name: umed3_s16_vvv_multiuse2 ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[V_MIN_U16_t16_e64_]], [[COPY2]], implicit $exec - ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[V_MIN_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_t16_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX11-NEXT: [[V_MAX_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_t16_e64 [[V_MIN_U16_t16_e64_]], [[PRED_COPY2]], implicit $exec + ; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, [[PRED_COPY2]], 0, 0, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir index e79a2452dd913..5bbfce0759e4f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir @@ -17,29 +17,29 @@ body: | ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[COPY2]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_XOR_B32_]], [[PRED_COPY2]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -62,28 +62,28 @@ body: | ; GFX8-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX8-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX8-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_1]] ; GFX9-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX9-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GFX9-NEXT: [[V_XOR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[V_XOR_B32_e64_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_1]] ; GFX10-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -110,32 +110,32 @@ body: | ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY3]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -160,32 +160,32 @@ body: | ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr_copy_commute ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_XOR_B32_]] + ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -210,28 +210,28 @@ body: | ; GFX8-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[COPY2]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[PRED_COPY2]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX9-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[COPY2]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_XOR_B32_]], [[PRED_COPY2]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; GFX10-LABEL: name: xor_s32_sgpr_sgpr_vgpr ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR3_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index 44b82bd669ef6..1d707c9fd22d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -13,13 +13,13 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -29,8 +29,8 @@ body: | ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 @@ -66,25 +66,25 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 @@ -120,13 +120,13 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -136,8 +136,8 @@ body: | ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 @@ -173,25 +173,25 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $vgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $vgpr0, $sgpr1 @@ -227,13 +227,13 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -243,8 +243,8 @@ body: | ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4 @@ -279,25 +279,25 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 @@ -333,21 +333,21 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -387,13 +387,13 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -403,8 +403,8 @@ body: | ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 - ; GCN-NEXT: $sgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY1]], %bb.1 + ; GCN-NEXT: $sgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 @@ -440,25 +440,25 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]] + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]] ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY4]], %bb.1 - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[PRED_COPY4]], %bb.1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir index ddaee358b64b8..b68e15d2ea367 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -18,66 +18,66 @@ body: | ; GFX6-LABEL: name: gep_p0_sgpr_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 @@ -98,67 +98,67 @@ body: | ; GFX6-LABEL: name: gep_p0_vgpr_vgpr ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -178,67 +178,67 @@ body: | ; GFX6-LABEL: name: gep_p0_sgpr_vgpr ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p0_sgpr_vgpr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p0_sgpr_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -258,37 +258,37 @@ body: | ; GFX6-LABEL: name: gep_p3_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p3_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p3_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -308,37 +308,37 @@ body: | ; GFX6-LABEL: name: gep_p3_vgpr_vgpr ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX8-LABEL: name: gep_p3_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: gep_p3_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -358,37 +358,37 @@ body: | ; GFX6-LABEL: name: gep_p3_sgpr_vgpr ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX8-LABEL: name: gep_p3_sgpr_vgpr ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] ; GFX9-LABEL: name: gep_p3_sgpr_vgpr ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-WAVE32-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] %0:sgpr(p3) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -408,37 +408,37 @@ body: | ; GFX6-LABEL: name: gep_p6_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p6_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p6_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p6) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -458,37 +458,37 @@ body: | ; GFX6-LABEL: name: gep_p2_sgpr_sgpr ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX8-LABEL: name: gep_p2_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX9-LABEL: name: gep_p2_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE64-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] ; GFX10-WAVE32-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p2) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -508,66 +508,66 @@ body: | ; GFX6-LABEL: name: gep_p999_sgpr_sgpr ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p999_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p999_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY4]], [[PRED_COPY5]], implicit-def $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 @@ -588,67 +588,67 @@ body: | ; GFX6-LABEL: name: gep_p999_vgpr_vgpr ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX8-LABEL: name: gep_p999_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: gep_p999_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} - ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} - ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; GFX10-WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-WAVE32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir index c4a983bca7376..9398cfa634307 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -13,9 +13,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -252645136 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -252645136 @@ -59,9 +59,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xffffffff ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1 @@ -82,9 +82,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0x00000000 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 0 @@ -105,9 +105,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2147483648 @@ -128,9 +128,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1073741824 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1073741824 @@ -151,9 +151,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2 @@ -174,9 +174,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -4 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -4 @@ -197,9 +197,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -8 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -8 @@ -220,9 +220,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -16 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -16 @@ -243,9 +243,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -536870912 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -536870912 @@ -266,9 +266,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -289,10 +289,10 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xffffffffffffffff ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 @@ -313,9 +313,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[S_MOV_B64_]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 @@ -336,11 +336,11 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[REG_SEQUENCE]], implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 @@ -361,11 +361,11 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 @@ -386,15 +386,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4294967296 @@ -415,11 +415,11 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clear_32 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], %const, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 4294967296 @@ -440,13 +440,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo1 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -2 @@ -467,13 +467,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo2 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4 @@ -494,13 +494,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo3 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -8 @@ -521,13 +521,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo4 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -16 @@ -548,15 +548,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo29 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[PRED_COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -536870912 @@ -577,9 +577,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -252645136 @@ -600,9 +600,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -2 @@ -623,9 +623,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -4 @@ -646,9 +646,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -8 @@ -669,9 +669,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -16 @@ -692,9 +692,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], %const, implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -536870912 @@ -715,14 +715,14 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY2]], [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY3]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -744,16 +744,16 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY2]], [[PRED_COPY4]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 @@ -775,15 +775,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -2 @@ -804,15 +804,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo2 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 @@ -833,15 +833,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo3 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 @@ -862,15 +862,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo4 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -16 @@ -891,15 +891,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo29 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[PRED_COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -536870912 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir index 144a93f3771ab..37bda1d97240f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrtoint.mir @@ -14,8 +14,8 @@ body: | ; CHECK-LABEL: name: ptrtoint_s_p3_to_s_s32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -35,8 +35,8 @@ body: | ; CHECK-LABEL: name: ptrtoint_s_p5_to_s_s32 ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p5) = COPY $sgpr0 %1:sgpr(s32) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: ptrtoint_s_p0_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -77,8 +77,8 @@ body: | ; CHECK-LABEL: name: ptrtoint_s_p1_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 @@ -98,8 +98,8 @@ body: | ; CHECK-LABEL: name: ptrtoint_s_p999_to_s_s64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_PTRTOINT %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir index 8c1567af27dc6..cb6ee0e11a9bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: return_address_already_live_in_copy ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0, implicit %1 @@ -34,9 +34,9 @@ body: | ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:sgpr(p0) = COPY $sgpr30_sgpr31 %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0, implicit %1 @@ -54,8 +54,8 @@ body: | ; CHECK-LABEL: name: return_address_no_live_in ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 S_ENDPGM 0, implicit %0 ... @@ -72,11 +72,11 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] bb.0: G_BR %bb.1 @@ -97,11 +97,11 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY]] bb.0: %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 G_BR %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir index 27a79c3506a6c..dcb64509e089b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir @@ -16,10 +16,10 @@ body: | ; CHECK-LABEL: name: sbfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec - ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2 @@ -39,10 +39,10 @@ body: | ; CHECK-LABEL: name: sbfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir index ec167b1d7c5a7..4d41d866ed8f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir @@ -13,14 +13,14 @@ body: | ; GCN-LABEL: name: select_s32_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -44,14 +44,14 @@ body: | ; GCN-LABEL: name: select_s64_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -75,14 +75,14 @@ body: | ; GCN-LABEL: name: select_p0_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -106,14 +106,14 @@ body: | ; GCN-LABEL: name: select_p1_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -137,14 +137,14 @@ body: | ; GCN-LABEL: name: select_p999_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -168,14 +168,14 @@ body: | ; GCN-LABEL: name: select_v4s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -199,14 +199,14 @@ body: | ; GCN-LABEL: name: select_s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[PRED_COPY3]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -232,14 +232,14 @@ body: | ; GCN-LABEL: name: select_v2s16_scc ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -263,12 +263,12 @@ body: | ; GCN-LABEL: name: select_s32_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -292,12 +292,12 @@ body: | ; GCN-LABEL: name: select_s16_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -323,12 +323,12 @@ body: | ; GCN-LABEL: name: select_v2s16_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -352,12 +352,12 @@ body: | ; GCN-LABEL: name: select_p3_vcc ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -382,12 +382,12 @@ body: | ; GCN-LABEL: name: select_s32_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY3]], 1, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -412,12 +412,12 @@ body: | ; GCN-LABEL: name: select_s32_vcc_fneg_rhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[PRED_COPY3]], 0, [[PRED_COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -442,12 +442,12 @@ body: | ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 3, [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -474,14 +474,14 @@ body: | ; GCN-LABEL: name: select_s16_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -510,13 +510,13 @@ body: | ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 - ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[PRED_COPY2]], implicit $exec + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -543,16 +543,16 @@ body: | ; GCN-LABEL: name: select_s32_scc_fneg_lhs ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[PRED_COPY3]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -577,16 +577,16 @@ body: | ; GCN-LABEL: name: select_s32_scc_fneg_rhs ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY4]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc + ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY3]], [[S_MOV_B32_]], implicit-def $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY2]], [[S_XOR_B32_]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir index 24faa2ce2500d..c55c631f49c2b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir @@ -13,9 +13,9 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s32_1 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 1 $sgpr0 = COPY %1 @@ -33,9 +33,9 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s32_2 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 131072, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 131072, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 2 $sgpr0 = COPY %1 @@ -53,9 +53,9 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s32_8 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I8_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I8_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I8 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I8_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 8 $sgpr0 = COPY %1 @@ -73,9 +73,9 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s32_16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 16 $sgpr0 = COPY %1 @@ -93,9 +93,9 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s32_31 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 2031616, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 2031616, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_SEXT_INREG %0, 31 $sgpr0 = COPY %1 @@ -113,11 +113,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_1 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 1 $sgpr0_sgpr1 = COPY %1 @@ -135,11 +135,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_2 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 131072, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 2 $sgpr0_sgpr1 = COPY %1 @@ -157,11 +157,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_8 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 524288, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 8 $sgpr0_sgpr1 = COPY %1 @@ -179,11 +179,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_16 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 16 $sgpr0_sgpr1 = COPY %1 @@ -201,11 +201,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_31 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2031616, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 31 $sgpr0_sgpr1 = COPY %1 @@ -224,11 +224,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_32 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 32 $sgpr0_sgpr1 = COPY %1 @@ -246,11 +246,11 @@ body: | ; GCN-LABEL: name: sext_inreg_sgpr_s64_63 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]].sub0, %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 4128768, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_SEXT_INREG %0, 63 $sgpr0_sgpr1 = COPY %1 @@ -268,9 +268,9 @@ body: | ; GCN-LABEL: name: sext_inreg_vgpr_s32_1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 1 $vgpr0 = COPY %1 @@ -288,9 +288,9 @@ body: | ; GCN-LABEL: name: sext_inreg_vgpr_s32_2 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 2, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 2, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 2 $vgpr0 = COPY %1 @@ -308,9 +308,9 @@ body: | ; GCN-LABEL: name: sext_inreg_vgpr_s32_8 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 8, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 8, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 8 $vgpr0 = COPY %1 @@ -328,9 +328,9 @@ body: | ; GCN-LABEL: name: sext_inreg_vgpr_s32_16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 16 $vgpr0 = COPY %1 @@ -348,9 +348,9 @@ body: | ; GCN-LABEL: name: sext_inreg_vgpr_s32_31 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 31, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 31, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_SEXT_INREG %0, 31 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir index 1056cc41cbb8b..2aad3d5a9d683 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -13,10 +13,10 @@ body: | ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_BFE_I32_]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_SEXT %1 @@ -36,9 +36,9 @@ body: | ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -57,11 +57,11 @@ body: | ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -80,9 +80,9 @@ body: | ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[PRED_COPY]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -102,11 +102,11 @@ body: | ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -126,11 +126,11 @@ body: | ; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_SEXT %0 $sgpr0_sgpr1 = COPY %1 @@ -164,10 +164,10 @@ body: | ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_BFE_I32_e64_]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_SEXT %1 @@ -187,9 +187,9 @@ body: | ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 1, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -208,9 +208,9 @@ body: | ; GCN-LABEL: name: sext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -230,9 +230,9 @@ body: | ; GCN-LABEL: name: sext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[PRED_COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir index 6bac125c0309b..446e2b863102c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir @@ -18,21 +18,21 @@ body: | ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -52,21 +52,21 @@ body: | ; GFX6-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_]] ; GFX7-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I16_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_]] ; GFX9-LABEL: name: sextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -103,23 +103,23 @@ body: | ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_I8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[PRED_COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_]] ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_I8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_I8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir index 9750d97fdfa16..f5280d2ce8b3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -17,37 +17,37 @@ body: | ; GFX6-LABEL: name: shl_s32_ss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX7-LABEL: name: shl_s32_ss ; GFX7: liveins: $sgpr0, $sgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX8-LABEL: name: shl_s32_ss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX9-LABEL: name: shl_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] ; GFX10-LABEL: name: shl_s32_ss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,37 +66,37 @@ body: | ; GFX6-LABEL: name: shl_s32_sv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_sv ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -115,37 +115,37 @@ body: | ; GFX6-LABEL: name: shl_s32_vs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_vs ; GFX7: liveins: $sgpr0, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -164,37 +164,37 @@ body: | ; GFX6-LABEL: name: shl_s32_vv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX7-LABEL: name: shl_s32_vv ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX8-LABEL: name: shl_s32_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX9-LABEL: name: shl_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] ; GFX10-LABEL: name: shl_s32_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,37 +213,37 @@ body: | ; GFX6-LABEL: name: shl_s64_ss ; GFX6: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX7-LABEL: name: shl_s64_ss ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX7-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX8-LABEL: name: shl_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX9-LABEL: name: shl_s64_ss ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] ; GFX10-LABEL: name: shl_s64_ss ; GFX10: liveins: $sgpr0_sgpr1, $sgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_LSHL_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 @@ -262,37 +262,37 @@ body: | ; GFX6-LABEL: name: shl_s64_sv ; GFX6: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_sv ; GFX7: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_sv ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_sv ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -311,37 +311,37 @@ body: | ; GFX6-LABEL: name: shl_s64_vs ; GFX6: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_vs ; GFX7: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_vs ; GFX8: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_vs ; GFX9: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_vs ; GFX10: liveins: $sgpr0, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -360,37 +360,37 @@ body: | ; GFX6-LABEL: name: shl_s64_vv ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX7-LABEL: name: shl_s64_vv ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[V_LSHL_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHL_B64_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX7-NEXT: S_ENDPGM 0, implicit [[V_LSHL_B64_e64_]] ; GFX8-LABEL: name: shl_s64_vv ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX9-LABEL: name: shl_s64_vv ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] ; GFX10-LABEL: name: shl_s64_vv ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index 6e8a379c89d7a..7219a83214fb0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -83,30 +83,30 @@ body: | ; GFX8-LABEL: name: shl_s16_s16_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -176,30 +176,30 @@ body: | ; GFX8-LABEL: name: shl_s16_s16_vv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -221,31 +221,31 @@ body: | ; GFX8-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] ; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_t16_e64_]], 0, 16, implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -421,30 +421,30 @@ body: | ; GFX8-LABEL: name: shl_s16_s16_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX11-LABEL: name: shl_s16_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[PRED_COPY1]], [[PRED_COPY]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHLREV_B16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir index a11fcc3b5cd5a..c4e1720df524f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir @@ -79,16 +79,16 @@ body: | ; GFX9-LABEL: name: shl_v2s16_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_sv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr0 @@ -122,16 +122,16 @@ body: | ; GFX9-LABEL: name: shl_v2s16_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_vs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr0 @@ -165,16 +165,16 @@ body: | ; GFX9-LABEL: name: shl_v2s16_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] ; GFX10-LABEL: name: shl_v2s16_vv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY1]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[PRED_COPY1]], 8, [[PRED_COPY]], 0, 0, 0, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PK_LSHLREV_B16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index 043d93aa814ef..0a70059e0cc91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -16,33 +16,33 @@ body: | ; WAVE64-LABEL: name: sitofp ; WAVE64: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) - ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64-NEXT: FLAT_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: sitofp ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX11-LABEL: name: sitofp ; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr3_vgpr4 + ; GFX11-NEXT: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -72,24 +72,24 @@ body: | ; WAVE64-LABEL: name: sitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: sitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -109,24 +109,24 @@ body: | ; WAVE64-LABEL: name: sitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: sitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir index 76fabc29daa11..10582116457f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: smax_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ body: | ; GCN-LABEL: name: smax_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ body: | ; GCN-LABEL: name: smax_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ body: | ; GCN-LABEL: name: smax_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir index 67a1130daf331..447a5e66890ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: smin_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ body: | ; GCN-LABEL: name: smin_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ body: | ; GCN-LABEL: name: smin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ body: | ; GCN-LABEL: name: smin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir index 20e242999e1ee..0479a6e2715db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir @@ -28,9 +28,9 @@ body: | ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -50,16 +50,16 @@ body: | ; SI-LABEL: name: smulh_s32_sv ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -79,16 +79,16 @@ body: | ; SI-LABEL: name: smulh_s32_vs ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -108,16 +108,16 @@ body: | ; SI-LABEL: name: smulh_s32_vv ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; SI-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] ; GFX9-LABEL: name: smulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir index 289d70be83c01..4d493769b706d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -18,15 +18,15 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr1_vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 0) @@ -163,15 +163,15 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir index 827eb13e3e4e8..19b3b4cce3d10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -19,23 +19,23 @@ body: | ; GFX6-LABEL: name: atomic_store_local_s32_seq_cst ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 3) @@ -204,23 +204,23 @@ body: | ; GFX6-LABEL: name: atomic_store_local_s64_seq_cst ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index 98d68b47ee11a..17d2a26d5bc1c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -19,33 +19,33 @@ body: | ; GFX7-LABEL: name: store_flat_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX11-LABEL: name: store_flat_s32_to_4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s32), align 4, addrspace 0) @@ -65,33 +65,33 @@ body: | ; GFX7-LABEL: name: store_flat_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX8-LABEL: name: store_flat_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX9-LABEL: name: store_flat_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX10-LABEL: name: store_flat_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX11-LABEL: name: store_flat_s32_to_2 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s16), align 2, addrspace 0) @@ -111,33 +111,33 @@ body: | ; GFX7-LABEL: name: store_flat_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX8-LABEL: name: store_flat_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX9-LABEL: name: store_flat_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX10-LABEL: name: store_flat_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX11-LABEL: name: store_flat_s32_to_1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s8), align 1, addrspace 0) @@ -158,33 +158,33 @@ body: | ; GFX7-LABEL: name: store_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX8-LABEL: name: store_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX9-LABEL: name: store_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX10-LABEL: name: store_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX11-LABEL: name: store_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (s64), align 8, addrspace 0) @@ -297,33 +297,33 @@ body: | ; GFX7-LABEL: name: store_flat_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX8-LABEL: name: store_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX9-LABEL: name: store_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX10-LABEL: name: store_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX11-LABEL: name: store_flat_v2s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0) @@ -343,33 +343,33 @@ body: | ; GFX7-LABEL: name: store_flat_v3s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX8-LABEL: name: store_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX9-LABEL: name: store_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX10-LABEL: name: store_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX11-LABEL: name: store_flat_v3s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0) @@ -389,33 +389,33 @@ body: | ; GFX7-LABEL: name: store_flat_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX8-LABEL: name: store_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX9-LABEL: name: store_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX10-LABEL: name: store_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX11-LABEL: name: store_flat_v4s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0) @@ -436,33 +436,33 @@ body: | ; GFX7-LABEL: name: store_flat_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX8-LABEL: name: store_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX9-LABEL: name: store_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX10-LABEL: name: store_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX11-LABEL: name: store_flat_v2s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0) @@ -483,33 +483,33 @@ body: | ; GFX7-LABEL: name: store_flat_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX8-LABEL: name: store_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX9-LABEL: name: store_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX10-LABEL: name: store_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX11-LABEL: name: store_flat_v4s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0) @@ -576,33 +576,33 @@ body: | ; GFX7-LABEL: name: store_flat_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX8-LABEL: name: store_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX9-LABEL: name: store_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX10-LABEL: name: store_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) ; GFX11-LABEL: name: store_flat_v8s16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0) @@ -623,33 +623,33 @@ body: | ; GFX7-LABEL: name: store_flat_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX8-LABEL: name: store_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX9-LABEL: name: store_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX10-LABEL: name: store_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX11-LABEL: name: store_flat_v2s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0) @@ -670,33 +670,33 @@ body: | ; GFX7-LABEL: name: store_flat_p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX8-LABEL: name: store_flat_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX9-LABEL: name: store_flat_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX10-LABEL: name: store_flat_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX11-LABEL: name: store_flat_p1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p1), align 8, addrspace 0) @@ -764,33 +764,33 @@ body: | ; GFX7-LABEL: name: store_flat_p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX8-LABEL: name: store_flat_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX9-LABEL: name: store_flat_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX10-LABEL: name: store_flat_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX11-LABEL: name: store_flat_p3 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store (p3), align 4, addrspace 0) @@ -857,33 +857,33 @@ body: | ; GFX7-LABEL: name: store_atomic_flat_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX11-LABEL: name: store_atomic_flat_s32 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0) @@ -904,33 +904,33 @@ body: | ; GFX7-LABEL: name: store_atomic_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX11-LABEL: name: store_atomic_flat_s64 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0) @@ -951,63 +951,63 @@ body: | ; GFX7-LABEL: name: store_flat_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX7-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX11-LABEL: name: store_flat_s32_gep_2047 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir index 8b96054da9dca..8842d9fcbd928 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -21,49 +21,49 @@ body: | ; GFX6-LABEL: name: store_global_s32_to_4 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) @@ -83,49 +83,49 @@ body: | ; GFX6-LABEL: name: store_global_s32_to_2 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_SHORT_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_SHORT_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s16), align 2, addrspace 1) @@ -145,49 +145,49 @@ body: | ; GFX6-LABEL: name: store_global_s32_to_1 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_BYTE_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_BYTE_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s8), align 1, addrspace 1) @@ -214,33 +214,33 @@ body: | ; GFX7-LABEL: name: store_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX8-LABEL: name: store_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX9-LABEL: name: store_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; GFX10-LABEL: name: store_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) @@ -313,49 +313,49 @@ body: | ; GFX6-LABEL: name: store_global_v2s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) @@ -375,49 +375,49 @@ body: | ; GFX6-LABEL: name: store_global_v4s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX4_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) @@ -444,33 +444,33 @@ body: | ; GFX7-LABEL: name: store_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 1) @@ -497,33 +497,33 @@ body: | ; GFX7-LABEL: name: store_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) @@ -550,33 +550,33 @@ body: | ; GFX7-LABEL: name: store_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) @@ -603,33 +603,33 @@ body: | ; GFX7-LABEL: name: store_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) @@ -656,33 +656,33 @@ body: | ; GFX7-LABEL: name: store_global_p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX8-LABEL: name: store_global_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX9-LABEL: name: store_global_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) ; GFX10-LABEL: name: store_global_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p1), align 8, addrspace 1) @@ -762,33 +762,33 @@ body: | ; GFX7-LABEL: name: store_global_p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX8-LABEL: name: store_global_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX9-LABEL: name: store_global_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) ; GFX10-LABEL: name: store_global_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) @@ -867,33 +867,33 @@ body: | ; GFX7-LABEL: name: store_atomic_global_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: FLAT_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 1) @@ -920,33 +920,33 @@ body: | ; GFX7-LABEL: name: store_atomic_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX8-NEXT: FLAT_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr2_vgpr3 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 1) @@ -967,69 +967,69 @@ body: | ; GFX6-LABEL: name: store_global_s32_gep_2047 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX7-FLAT-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-FLAT-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX7-FLAT-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-FLAT-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX7-FLAT-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-FLAT-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX7-FLAT-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY2]], [[PRED_COPY3]], 0, implicit $exec + ; GFX8-NEXT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY4]], [[PRED_COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX8-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: GLOBAL_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir index 92bd082042c78..279b1fc67f4d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir @@ -22,38 +22,38 @@ body: | ; GFX7-LABEL: name: store_global_v3s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-NEXT: BUFFER_STORE_DWORDX3_ADDR64 [[PRED_COPY1]], [[PRED_COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX7-FLAT-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX8-LABEL: name: store_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX8-NEXT: FLAT_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX9-LABEL: name: store_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX9-NEXT: GLOBAL_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX10-LABEL: name: store_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr2_vgpr3_vgpr4 + ; GFX10-NEXT: GLOBAL_STORE_DWORDX3 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir index 3c9edd2b23a57..e15f49cf9de5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -23,29 +23,29 @@ body: | ; GFX6-LABEL: name: store_local_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_4 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 3) @@ -69,29 +69,29 @@ body: | ; GFX6-LABEL: name: store_local_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B16 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_2 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B16 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B16_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_2 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B16_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 3) @@ -115,29 +115,29 @@ body: | ; GFX6-LABEL: name: store_local_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B8 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_1 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B8 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B8_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) ; GFX10-LABEL: name: store_local_s32_to_1 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B8_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 3) @@ -161,29 +161,29 @@ body: | ; GFX6-LABEL: name: store_local_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX10-LABEL: name: store_local_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 3) @@ -207,29 +207,29 @@ body: | ; GFX6-LABEL: name: store_local_p3 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX7-LABEL: name: store_local_p3 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B32 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX9-LABEL: name: store_local_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) ; GFX10-LABEL: name: store_local_p3 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: DS_WRITE_B32_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 3) @@ -333,28 +333,28 @@ body: | ; GFX7-LABEL: name: store_local_s64_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (s64), align 4, addrspace 3) @@ -385,28 +385,28 @@ body: | ; GFX7-LABEL: name: store_local_p1_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_p1_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (p1), align 4, addrspace 3) @@ -437,28 +437,28 @@ body: | ; GFX7-LABEL: name: store_local_v2s32_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_v2s32_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<2 x s32>), align 4, addrspace 3) @@ -489,28 +489,28 @@ body: | ; GFX7-LABEL: name: store_local_v4s16_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_v4s16_align4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<4 x s16>), align 4, addrspace 3) @@ -534,29 +534,29 @@ body: | ; GFX6-LABEL: name: store_local_s64_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX7-LABEL: name: store_local_s64_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX9-LABEL: name: store_local_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) ; GFX10-LABEL: name: store_local_s64_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (s64), align 8, addrspace 3) @@ -580,29 +580,29 @@ body: | ; GFX6-LABEL: name: store_local_p1_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX7-LABEL: name: store_local_p1_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX9-LABEL: name: store_local_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) ; GFX10-LABEL: name: store_local_p1_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (p1), align 8, addrspace 3) @@ -626,29 +626,29 @@ body: | ; GFX6-LABEL: name: store_local_v2s32_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s32_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX10-LABEL: name: store_local_v2s32_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<2 x s32>), align 8, addrspace 3) @@ -672,29 +672,29 @@ body: | ; GFX6-LABEL: name: store_local_v4s16_align8 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX6-NEXT: $m0 = S_MOV_B32 -1 - ; GFX6-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX6-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v4s16_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX7-NEXT: DS_WRITE_B64 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX10-LABEL: name: store_local_v4s16_align8 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: DS_WRITE_B64_gfx9 [[PRED_COPY1]], [[PRED_COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %0, %1 :: (store (<4 x s16>), align 8, addrspace 3) @@ -727,28 +727,28 @@ body: | ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[PRED_COPY1]], [[COPY1]], [[COPY]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1016 @@ -783,34 +783,34 @@ body: | ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX7-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7-NEXT: $m0 = S_MOV_B32 -1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7-NEXT: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX7-NEXT: DS_WRITE2_B32 [[V_ADD_CO_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX9-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX10-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec - ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PRED_COPY]].sub0 + ; GFX10-NEXT: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY1]], [[COPY]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1020 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir index d9d63acf9bb92..5b0aa67d069b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -23,21 +23,21 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -62,21 +62,21 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -101,21 +101,21 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -140,21 +140,21 @@ body: | ; GFX6-LABEL: name: function_store_private_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX9-LABEL: name: function_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX11-LABEL: name: function_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -179,21 +179,21 @@ body: | ; GFX6-LABEL: name: function_store_private_p3 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: function_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX11-LABEL: name: function_store_private_p3 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -218,21 +218,21 @@ body: | ; GFX6-LABEL: name: function_store_private_p5 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: function_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX11-LABEL: name: function_store_private_p5 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -355,21 +355,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_s32_to_4 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_4 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -393,21 +393,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_s32_to_2 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_2 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -431,21 +431,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_s32_to_1 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_s32_to_1 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -470,21 +470,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_v2s16 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -508,21 +508,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_p3 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_p3 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -546,21 +546,21 @@ body: | ; GFX6-LABEL: name: kernel_store_private_p5 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[PRED_COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX11-LABEL: name: kernel_store_private_p5 ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -701,19 +701,19 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -738,19 +738,19 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[PRED_COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4095 @@ -777,23 +777,23 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6-NEXT: %3:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], %3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:vgpr(s32) = G_CONSTANT i32 4095 @@ -819,30 +819,30 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX6-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], %4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[PRED_COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec - ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[PRED_COPY1]], 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[PRED_COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4096 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir index 7e647b0bac37a..be7ee36f211e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -19,24 +19,24 @@ body: | ; GFX6-LABEL: name: sub_s32 ; GFX6: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec - ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %9 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], [[V_SUB_CO_U32_e64_]], 0, implicit $exec + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[V_SUB_CO_U32_e64_2]], [[PRED_COPY2]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_4]] ; GFX9-LABEL: name: sub_s32 ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[PRED_COPY2]], [[S_SUB_I32_]], 0, implicit $exec ; GFX9-NEXT: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec - ; GFX9-NEXT: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9-NEXT: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[PRED_COPY2]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir index d0258bdb0cd44..9f9900872a948 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir @@ -13,8 +13,8 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s32_to_s1 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -31,8 +31,8 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s32_to_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -49,9 +49,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s64_to_s32 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -68,9 +68,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s64_to_s16 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -87,9 +87,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s64_to_s1 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -106,9 +106,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s96_to_s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -125,9 +125,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s96_to_s64 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s64) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -144,9 +144,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s128_to_s16 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -163,9 +163,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s128_to_s96 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -182,9 +182,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s256_to_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_128 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s128) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -201,9 +201,9 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s512_to_s256 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_512 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:sgpr(s512) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s256) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -220,8 +220,8 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s32_to_s1 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -238,8 +238,8 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s32_to_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -256,9 +256,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s64_to_s32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -275,9 +275,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s64_to_s16 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -294,9 +294,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s64_to_s1 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s1) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -313,9 +313,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s96_to_s16 ; GCN: liveins: $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -332,9 +332,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s96_to_s64 ; GCN: liveins: $vgpr0_vgpr1_vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96 = PRED_COPY $vgpr0_vgpr1_vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s64) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -351,9 +351,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s128_to_s16 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s16) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -370,9 +370,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s128_to_s96 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY [[COPY]].sub0_sub1_sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s96) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -389,9 +389,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s256_to_s128 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s128) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -408,9 +408,9 @@ body: | ; GCN-LABEL: name: trunc_vgpr_s512_to_s256 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_512 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_256 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:vgpr(s256) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -428,10 +428,10 @@ body: | ; GCN-LABEL: name: trunc_sgpr_s32_to_s1_use ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: $scc = COPY [[COPY]] - ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: $scc = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir index 4858d0274a1ec..9ce3292c2fe76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir @@ -17,34 +17,34 @@ body: | ; GFX6-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX8-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] ; GFX11-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 16, implicit-def $scc ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 @@ -64,31 +64,31 @@ body: | ; GFX6-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX6-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[PRED_COPY2]], implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; GFX8-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[COPY1]](tied-def 0) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX8-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[PRED_COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[PRED_COPY1]](tied-def 0) ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_sdwa]] ; GFX11-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[PRED_COPY2]], implicit $exec ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX11-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir index 6f34ad2c0fb7a..1415937abb414 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir @@ -16,12 +16,12 @@ body: | ; GFX10-LABEL: name: uadde_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec @@ -50,12 +50,12 @@ body: | ; GFX10-LABEL: name: uadde_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir index 20466f45ffe26..0d697def6acf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -17,32 +17,32 @@ body: | ; WAVE64-LABEL: name: uadde_s32_s1_sss ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY3]] - ; WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY4]] - ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] ; WAVE32-LABEL: name: uadde_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY3]] - ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY4]] - ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,24 +66,24 @@ body: | ; WAVE64-LABEL: name: uadde_s32_s1_vvv ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: uadde_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir index eac900c71dfce..8f3590d098f0f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir @@ -17,42 +17,42 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_sss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX6-NEXT: $scc = COPY [[COPY2]] - ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX6-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX8-LABEL: name: uaddo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: $scc = COPY [[COPY2]] - ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX9-LABEL: name: uaddo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX9-NEXT: $scc = COPY [[COPY2]] - ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX9-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] ; GFX10-LABEL: name: uaddo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX10-NEXT: $scc = COPY [[COPY2]] - ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX10-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,34 +73,34 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: uaddo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: uaddo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: uaddo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -121,9 +121,9 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vsv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -131,9 +131,9 @@ body: | ; GFX8-LABEL: name: uaddo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -141,9 +141,9 @@ body: | ; GFX9-LABEL: name: uaddo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -151,9 +151,9 @@ body: | ; GFX10-LABEL: name: uaddo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -179,9 +179,9 @@ body: | ; GFX6-LABEL: name: uaddo_s32_s1_vvs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -189,9 +189,9 @@ body: | ; GFX8-LABEL: name: uaddo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -199,9 +199,9 @@ body: | ; GFX9-LABEL: name: uaddo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec @@ -209,9 +209,9 @@ body: | ; GFX10-LABEL: name: uaddo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir index bb0830dffbdfe..1bddd57974d4f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir @@ -30,10 +30,10 @@ body: | ; CHECK-LABEL: name: ubfx_s32_vii ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec - ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2 @@ -67,10 +67,10 @@ body: | ; CHECK-LABEL: name: ubfx_s32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]], implicit $exec ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir index a9419b94ff6c4..7d0e7501daf42 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -16,21 +16,21 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s32_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s32_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -49,21 +49,21 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s32_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s32_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F32_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -82,24 +82,24 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s16_vv ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -119,24 +119,24 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE64-NEXT: $vgpr0 = COPY %1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE64-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; WAVE32-NEXT: $vgpr0 = COPY %1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; WAVE32-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_e64_]] ; GFX11-LABEL: name: uitofp_s32_to_s16_vs ; GFX11: liveins: $sgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY %1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[PRED_COPY]], implicit $mode, implicit $exec + ; GFX11-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[V_CVT_F16_F32_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 %2:vgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir index 44004004c1df2..6a6da93f4fc91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: umax_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MAX_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ body: | ; GCN-LABEL: name: umax_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ body: | ; GCN-LABEL: name: umax_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ body: | ; GCN-LABEL: name: umax_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAX_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir index d206860edbcc7..6b3c5a082bc98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir @@ -14,9 +14,9 @@ body: | ; GCN-LABEL: name: umin_s32_ss ; GCN: liveins: $sgpr0, $sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GCN-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MIN_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -35,9 +35,9 @@ body: | ; GCN-LABEL: name: umin_s32_sv ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -56,9 +56,9 @@ body: | ; GCN-LABEL: name: umin_s32_vs ; GCN: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -77,9 +77,9 @@ body: | ; GCN-LABEL: name: umin_s32_vv ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MIN_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir index 9922487773769..656c3fda3fde2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir @@ -28,9 +28,9 @@ body: | ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_MUL_HI_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -50,16 +50,16 @@ body: | ; SI-LABEL: name: umulh_s32_sv ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -79,16 +79,16 @@ body: | ; SI-LABEL: name: umulh_s32_vs ; SI: liveins: $sgpr0, $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -108,16 +108,16 @@ body: | ; SI-LABEL: name: umulh_s32_vv ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; SI-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] ; GFX9-LABEL: name: umulh_s32_vv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 440e475eedc4b..bb2e8493d1d27 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -14,10 +14,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -36,10 +36,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -58,10 +58,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_v_s32_s_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32), %2:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -80,10 +80,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s32_v_s32_s_s64 ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -102,11 +102,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s96 ; GCN: liveins: $sgpr0_sgpr1_sgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub2 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -125,12 +125,12 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s32_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]].sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY4]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32), %2:sgpr(s32), %3:sgpr(s32), %4:sgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 @@ -149,10 +149,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub2_sub3 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[PRED_COPY]].sub2_sub3 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64), %2:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -172,10 +172,10 @@ body: | ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -194,10 +194,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_rc_set_def_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr_32(s32), %2:vgpr_32(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -216,10 +216,10 @@ body: | ; GCN-LABEL: name: test_unmerge_values_rc_set_use_v_s32_v_s32_v_s64 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY2]] %0:vreg_64(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32), %2:vgpr(s32) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -239,11 +239,11 @@ body: | ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_1024 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_256 = COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_256 = PRED_COPY [[DEF]].sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]], implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:sgpr(s1024) = G_IMPLICIT_DEF %1:sgpr(s256), %2:sgpr(s256), %3:sgpr(s256), %4:sgpr(s256) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4 @@ -268,11 +268,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s512_s_s1024 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_512 = COPY [[COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_512 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[COPY1]] - ; GCN-NEXT: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_1024 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_512 = PRED_COPY [[PRED_COPY]].sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_512 = PRED_COPY [[PRED_COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = PRED_COPY [[PRED_COPY1]] + ; GCN-NEXT: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = PRED_COPY [[PRED_COPY2]] %0:sgpr(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s512), %2:sgpr(s512) = G_UNMERGE_VALUES %0 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 @@ -292,19 +292,19 @@ body: | ; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32 ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]] - ; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]] - ; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]] - ; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr0_sgpr1_sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr3_sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr6_sgpr7_sgpr8 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_96 = PRED_COPY $sgpr9_sgpr10_sgpr11 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2, [[PRED_COPY1]], %subreg.sub3_sub4_sub5, [[PRED_COPY2]], %subreg.sub6_sub7_sub8, [[PRED_COPY3]], %subreg.sub9_sub10_sub11 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_96 = PRED_COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = PRED_COPY [[PRED_COPY7]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5 %2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8 @@ -330,17 +330,17 @@ body: | ; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32 ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_192 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_192 = PRED_COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[PRED_COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr3_vgpr4_vgpr5 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr6_vgpr7_vgpr8 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr9_vgpr10_vgpr11 = PRED_COPY [[PRED_COPY5]] %0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:vgpr(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 %2:vgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir index 38b96edac38db..aa069101889ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir @@ -16,12 +16,12 @@ body: | ; GFX10-LABEL: name: usube_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec @@ -50,12 +50,12 @@ body: | ; GFX10-LABEL: name: usube_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir index 015c134d19917..c3bcb2093559a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -17,32 +17,32 @@ body: | ; WAVE64-LABEL: name: usube_s32_s1_sss ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY3]] - ; WAVE64-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: $scc = COPY [[COPY4]] - ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE64-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE64-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE64-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] ; WAVE32-LABEL: name: usube_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY3]] - ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: $scc = COPY [[COPY4]] - ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32-NEXT: S_CMP_EQ_U32 [[PRED_COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY3]] + ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc, implicit $scc + ; WAVE32-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; WAVE32-NEXT: $scc = PRED_COPY [[PRED_COPY4]] + ; WAVE32-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -66,24 +66,24 @@ body: | ; WAVE64-LABEL: name: usube_s32_s1_vvv ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: usube_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec - ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir index 6362dbd0bab8f..90201fdb4fdcf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir @@ -17,42 +17,42 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_sss ; GFX6: liveins: $sgpr0, $sgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX6-NEXT: $scc = COPY [[COPY2]] - ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX6-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX6-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX6-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX8-LABEL: name: usubo_s32_s1_sss ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX8-NEXT: $scc = COPY [[COPY2]] - ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX8-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX8-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX8-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX9-LABEL: name: usubo_s32_s1_sss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX9-NEXT: $scc = COPY [[COPY2]] - ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX9-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX9-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX9-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] ; GFX10-LABEL: name: usubo_s32_s1_sss ; GFX10: liveins: $sgpr0, $sgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; GFX10-NEXT: $scc = COPY [[COPY2]] - ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; GFX10-NEXT: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; GFX10-NEXT: $scc = PRED_COPY [[PRED_COPY2]] + ; GFX10-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit $scc ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_SUB_U32_]], implicit [[S_CSELECT_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -73,34 +73,34 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vvv ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX8-LABEL: name: usubo_s32_s1_vvv ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX9-LABEL: name: usubo_s32_s1_vvv ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] ; GFX10-LABEL: name: usubo_s32_s1_vvv ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -121,9 +121,9 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vsv ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -131,9 +131,9 @@ body: | ; GFX8-LABEL: name: usubo_s32_s1_vsv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -141,9 +141,9 @@ body: | ; GFX9-LABEL: name: usubo_s32_s1_vsv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -151,9 +151,9 @@ body: | ; GFX10-LABEL: name: usubo_s32_s1_vsv ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -179,9 +179,9 @@ body: | ; GFX6-LABEL: name: usubo_s32_s1_vvs ; GFX6: liveins: $sgpr0, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX6-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -189,9 +189,9 @@ body: | ; GFX8-LABEL: name: usubo_s32_s1_vvs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX8-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX8-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -199,9 +199,9 @@ body: | ; GFX9-LABEL: name: usubo_s32_s1_vvs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX9-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec @@ -209,9 +209,9 @@ body: | ; GFX10-LABEL: name: usubo_s32_s1_vvs ; GFX10: liveins: $sgpr0, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GFX10-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[PRED_COPY]], [[PRED_COPY1]], 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index 91c45ac0201ab..1ece487bf6a59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -17,21 +17,21 @@ body: | ; WAVE64-LABEL: name: xor_s1_vcc_vcc_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[PRED_COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -58,16 +58,16 @@ body: | ; WAVE64-LABEL: name: xor_s1_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -90,16 +90,16 @@ body: | ; WAVE64-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -122,16 +122,16 @@ body: | ; WAVE64-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -154,16 +154,16 @@ body: | ; WAVE64-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -184,16 +184,16 @@ body: | ; WAVE64-LABEL: name: xor_s64_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -214,16 +214,16 @@ body: | ; WAVE64-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1 + ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -244,16 +244,16 @@ body: | ; WAVE64-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -274,16 +274,16 @@ body: | ; WAVE64-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr2_sgpr3 + ; WAVE32-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[PRED_COPY]], [[PRED_COPY1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -304,16 +304,16 @@ body: | ; WAVE64-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -334,16 +334,16 @@ body: | ; WAVE64-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[PRED_COPY]], [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -396,22 +396,22 @@ body: | ; WAVE64-LABEL: name: xor_s1_vcc_copy_to_vcc ; WAVE64: liveins: $vgpr0, $vgpr1 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] ; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY1]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] @@ -441,27 +441,27 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]] - ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_XOR_B64_]] + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32_xm0 = PRED_COPY [[S_XOR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 @@ -489,9 +489,9 @@ body: | ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64-NEXT: {{ $}} - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE64-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec @@ -500,15 +500,15 @@ body: | ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; WAVE32-NEXT: %sgpr0:sreg_32 = PRED_COPY $sgpr0 + ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] - ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[S_XOR_B32_]] + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PRED_COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) %sgpr0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir index 86ac8f59d483a..4d8a94dff009d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -13,10 +13,10 @@ body: | ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s16 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]] - ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s16) = G_ZEXT %1 @@ -36,9 +36,9 @@ body: | ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -57,11 +57,11 @@ body: | ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -80,9 +80,9 @@ body: | ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[PRED_COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -102,11 +102,11 @@ body: | ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -126,11 +126,11 @@ body: | ; GCN-LABEL: name: zext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 2097152, implicit-def $scc - ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: $sgpr0_sgpr1 = PRED_COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_ZEXT %0 $sgpr0_sgpr1 = COPY %1 @@ -164,10 +164,10 @@ body: | ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s16) = G_ZEXT %1 @@ -187,9 +187,9 @@ body: | ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_AND_B32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -208,9 +208,9 @@ body: | ; GCN-LABEL: name: zext_vgpr_s16_to_vgpr_s32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[PRED_COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -230,9 +230,9 @@ body: | ; GCN-LABEL: name: zext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = PRED_COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir index 63e5d061f8c37..b6e67df332571 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir @@ -18,21 +18,21 @@ body: | ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -52,21 +52,21 @@ body: | ; GFX6-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX7-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[PRED_COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: zextload_local_s32_from_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[PRED_COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U16_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -103,23 +103,23 @@ body: | ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX6-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_ADD_CO_U32_e64_]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX7: liveins: $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) - ; GFX7-NEXT: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[PRED_COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1_offset4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) - ; GFX9-NEXT: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[PRED_COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[DS_READ_U8_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p3) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll index 6487cba23663d..de0ffd4afbc66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll @@ -8,8 +8,8 @@ define amdgpu_ps void @test_sendmsg(i32 inreg %m0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index 41ac2b984ef10..adb60ae39773a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -7,12 +7,12 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -21,12 +21,12 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -41,12 +41,12 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -55,12 +55,12 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -75,12 +75,12 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -89,12 +89,12 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -109,12 +109,12 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -123,12 +123,12 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -143,12 +143,12 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -157,12 +157,12 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -177,12 +177,12 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -191,12 +191,12 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -211,12 +211,12 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -224,12 +224,12 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -243,12 +243,12 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -256,12 +256,12 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -275,12 +275,12 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -288,12 +288,12 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -307,12 +307,12 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -320,12 +320,12 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -339,12 +339,12 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -352,12 +352,12 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -371,12 +371,12 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, < ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -384,12 +384,12 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -403,12 +403,12 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -416,12 +416,12 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -435,12 +435,12 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -448,12 +448,12 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -467,12 +467,12 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -480,12 +480,12 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -499,12 +499,12 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, < ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -512,12 +512,12 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -531,12 +531,12 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -544,12 +544,12 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -563,12 +563,12 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -576,12 +576,12 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -595,12 +595,12 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -608,12 +608,12 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -627,12 +627,12 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, < ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -640,12 +640,12 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -659,12 +659,12 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -672,12 +672,12 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -691,12 +691,12 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -704,12 +704,12 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -723,12 +723,12 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -736,12 +736,12 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -755,12 +755,12 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, < ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -768,12 +768,12 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -787,12 +787,12 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -800,12 +800,12 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -819,12 +819,12 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -832,12 +832,12 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -851,12 +851,12 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, < ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -864,12 +864,12 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -883,12 +883,12 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -896,12 +896,12 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -915,12 +915,12 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -928,12 +928,12 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -946,12 +946,12 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -959,12 +959,12 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -978,12 +978,12 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -991,12 +991,12 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1009,12 +1009,12 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1023,12 +1023,12 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1043,12 +1043,12 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1057,12 +1057,12 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1077,12 +1077,12 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1091,12 +1091,12 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1111,12 +1111,12 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) ; HSA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1125,12 +1125,12 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) @@ -1147,9 +1147,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1158,9 +1158,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1174,9 +1174,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1185,9 +1185,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1209,59 +1209,59 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) + ; HSA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; HSA-VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 @@ -1280,61 +1280,61 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; HSA-VI-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1) + ; HSA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; HSA-VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; HSA-VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[PRED_COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[PRED_COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C4]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1) - ; LEGACY-MESA-VI-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p1) = PRED_COPY [[C5]](p1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[PRED_COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[PRED_COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 @@ -1355,49 +1355,49 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI: bb.1 (%ir-block.1): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1) + ; HSA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C4]](p1) ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.1): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1) + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p1) = PRED_COPY [[C4]](p1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[PRED_COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 @@ -1415,13 +1415,13 @@ define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 ; HSA-VI: bb.1.entry: ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg ; LEGACY-MESA-VI: bb.1.entry: ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 entry: ret void @@ -1433,12 +1433,12 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1447,12 +1447,12 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1468,12 +1468,12 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1482,12 +1482,12 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1503,14 +1503,14 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1520,14 +1520,14 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1544,14 +1544,14 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) @@ -1561,14 +1561,14 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) @@ -1586,14 +1586,14 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1603,14 +1603,14 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1627,14 +1627,14 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; HSA-VI: bb.1 (%ir-block.1): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) @@ -1644,14 +1644,14 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; LEGACY-MESA-VI: bb.1 (%ir-block.1): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) @@ -1670,12 +1670,12 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1684,12 +1684,12 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1704,12 +1704,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1718,12 +1718,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1738,12 +1738,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1752,12 +1752,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1772,12 +1772,12 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1786,12 +1786,12 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1807,12 +1807,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1821,12 +1821,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) @@ -1841,16 +1841,16 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) @@ -1862,16 +1862,16 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) @@ -1892,9 +1892,9 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1903,9 +1903,9 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1920,9 +1920,9 @@ define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) @@ -1931,9 +1931,9 @@ define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) @@ -1947,7 +1947,7 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3) @@ -1956,7 +1956,7 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3) @@ -1970,9 +1970,9 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) @@ -1981,9 +1981,9 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) @@ -1997,9 +1997,9 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind { ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) @@ -2008,9 +2008,9 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind { ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) @@ -2024,12 +2024,12 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5 ; HSA-VI-NEXT: {{ $}} - ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) @@ -2041,12 +2041,12 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: {{ $}} - ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll index 2da014d79a0f2..1805d364445b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -7,10 +7,10 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -22,11 +22,11 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -38,8 +38,8 @@ define amdgpu_ps float @vgpr_return(i32 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %cast = bitcast i32 %vgpr to float ret float %cast @@ -50,9 +50,9 @@ define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 %vgpr } @@ -62,14 +62,14 @@ define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret i64 %vgpr } @@ -79,14 +79,14 @@ define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ret <2 x i32> %vgpr } @@ -96,13 +96,13 @@ define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0 %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 7445bb3b2a3b2..067ceb3e48f20 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -6,9 +6,9 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -19,9 +19,9 @@ define amdgpu_vs void @test_f32(float %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -32,9 +32,9 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg0, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 @@ -46,13 +46,13 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg1, addrspace 4) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -64,11 +64,11 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[PRED_COPY2]](s32), [[PRED_COPY]](s32), [[PRED_COPY3]](s32), [[PRED_COPY1]](s32), 0, 0 ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 ret void @@ -79,13 +79,13 @@ define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) { ; CHECK: bb.1.main_body: ; CHECK-NEXT: liveins: $sgpr2, $sgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY1]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 main_body: %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 @@ -98,7 +98,7 @@ define amdgpu_vs i32 @non_void_ret() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ret i32 0 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll index aa19fc8b26fca..c28668a155d31 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -7,9 +7,9 @@ define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -25,42 +25,42 @@ define void @call_result_align_1() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -74,42 +74,42 @@ define void @call_result_align_8() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -124,42 +124,42 @@ define void @declaration_result_align_8() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -174,36 +174,36 @@ define i8 addrspace(1)* @tail_call_assert_align() { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %call = tail call i8 addrspace(1)* @returns_ptr_align8() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 5206284cf5723..c00ed4580a30a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -6,10 +6,10 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) - ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[PRED_COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ATOMICRMW_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval @@ -21,10 +21,10 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.atomicrmw.start: @@ -33,7 +33,7 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] - ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[PRED_COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) ; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3 @@ -43,7 +43,7 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2 ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PHI2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 545580f33c74f..cf2666e22fcbc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -9,37 +9,36 @@ declare hidden void @extern() define amdgpu_kernel void @kernel_call_no_workitem_ids() { ; CHECK-LABEL: name: kernel_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY7]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY8]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY9]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY10]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -50,41 +49,40 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() { define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK-LABEL: name: kernel_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY7]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY9]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY11]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY8]](s64) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -95,32 +93,32 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { define amdgpu_kernel void @kernel_call_no_other_sgprs() { ; CHECK-LABEL: name: kernel_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr6_sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY3]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY4]], [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY5]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY8]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -133,34 +131,34 @@ define void @func_call_no_workitem_ids() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -173,28 +171,28 @@ define void @func_call_no_workgroup_ids() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY4]](p4) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY8]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY10]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY11]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -207,19 +205,19 @@ define void @func_call_no_other_sgprs() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY6]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY3]](p4) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY5]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index cfc2fc0dc439a..67f3ea40630ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -11,105 +11,103 @@ declare hidden void @external_void_func_v32i32(<32 x i32>) #0 define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -122,39 +120,39 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900: bb.1 (%ir-block.0): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -162,39 +160,39 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -207,177 +205,175 @@ define void @test_func_call_external_void_func_i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX900-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX900-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX900-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX900-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX900-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX900-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX900-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX900-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX900-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX900-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX900-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX900-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX900-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX900-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX900-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX900-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX900-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX900-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX900-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX900-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX900-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX900-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX900-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX900-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX900-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX900-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX900-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX900-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX900-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX900-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX908-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX908-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX908-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX908-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX908-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX908-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX908-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX908-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX908-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX908-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX908-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX908-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX908-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX908-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX908-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX908-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX908-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX908-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX908-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX908-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX908-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX908-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX908-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX908-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX908-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX908-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX908-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX908-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX908-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX908-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -390,126 +386,126 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900: bb.1 (%ir-block.1): ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX900-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX900-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX900-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX900-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX900-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX900-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX900-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX900-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY24]](s32) ; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GFX900-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY25]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX900-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX900-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX900-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX900-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX900-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX900-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX900-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX900-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX900-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX900-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX900-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX900-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX900-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX900-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX900-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX900-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX900-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX900-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX900-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX900-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX900-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX900-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX900-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX900-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX900-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX900-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX900-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX900-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX900-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX900-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX900-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX900-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX900-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX900-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX900-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX900-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX900-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX900-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX900-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX900-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX900-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX900-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX900-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX900-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX900-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX900-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX900-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX900-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX900-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX900-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX900-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX900-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX900-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX900-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX900-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX900-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX900-NEXT: [[PRED_COPY35:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY35]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY26]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY27]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY28]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY29]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY30]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY31]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY32]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY33]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY34]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: SI_RETURN @@ -517,126 +513,126 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX908-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX908-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX908-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX908-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX908-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX908-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY24]](s32) ; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GFX908-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY25]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GFX908-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX908-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX908-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX908-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX908-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX908-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX908-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GFX908-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GFX908-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GFX908-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GFX908-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GFX908-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GFX908-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GFX908-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; GFX908-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; GFX908-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; GFX908-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; GFX908-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; GFX908-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; GFX908-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; GFX908-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; GFX908-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; GFX908-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; GFX908-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; GFX908-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; GFX908-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX908-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX908-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX908-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GFX908-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GFX908-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GFX908-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GFX908-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GFX908-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GFX908-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GFX908-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GFX908-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GFX908-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GFX908-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GFX908-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GFX908-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; GFX908-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; GFX908-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; GFX908-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; GFX908-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; GFX908-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; GFX908-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; GFX908-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; GFX908-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; GFX908-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; GFX908-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; GFX908-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; GFX908-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; GFX908-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; GFX908-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; GFX908-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; GFX908-NEXT: [[PRED_COPY35:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY35]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY26]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY27]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY28]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY29]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY30]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY31]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY32]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY33]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY34]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: SI_RETURN @@ -647,83 +643,81 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 { ; GFX900-LABEL: name: test_only_workitem_id_x ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY13]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_x ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY13]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -734,91 +728,89 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 { ; GFX900-LABEL: name: test_only_workitem_id_y ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_y ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -829,91 +821,89 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 { ; GFX900-LABEL: name: test_only_workitem_id_z ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_z ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4) + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY8]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY14]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY7]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY9]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY10]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -924,93 +914,91 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 { ; GFX900-LABEL: name: test_only_workitem_id_xy ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY14]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_xy ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY14]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1021,101 +1009,99 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size ! define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 { ; GFX900-LABEL: name: test_only_workitem_id_yz ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY14]], [[C3]](s32) ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C4]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_yz ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY14]], [[C3]](s32) ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32) + ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C4]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 @@ -1126,93 +1112,91 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size ! define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 { ; GFX900-LABEL: name: test_only_workitem_id_xz ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX900-NEXT: {{ $}} - ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX900-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX900-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX900-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX900-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX900-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX900-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX900-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX900-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX900-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX900-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX900-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX900-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX900-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX900-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C2]](s32) + ; GFX900-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY14]], [[SHL]] + ; GFX900-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX900-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX900-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX900-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX900-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX900-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX900-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_xz ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY9]], [[C1]](s64) + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GFX908-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32) - ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]] - ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) + ; GFX908-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C2]](s32) + ; GFX908-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY14]], [[SHL]] + ; GFX908-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY10]](s64) + ; GFX908-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY11]](s32) + ; GFX908-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY12]](s32) + ; GFX908-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY13]](s32) + ; GFX908-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GFX908-NEXT: $vgpr31 = PRED_COPY [[OR]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 4103b8055e388..b813ee3a654f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -13,8 +13,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -27,13 +27,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -46,13 +46,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -73,10 +73,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -99,10 +99,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 4ded7eba7ab15..015716a95ec95 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -70,58 +70,57 @@ declare hidden i32 @external_gfx_i32_func_i32(i32) #0 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out @@ -133,19 +132,19 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1) ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 - ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out @@ -155,54 +154,53 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1) define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -217,11 +215,11 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: SI_RETURN @@ -233,54 +231,53 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY19]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) @@ -295,54 +292,53 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i1_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY19]], 1 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) @@ -357,54 +353,53 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) @@ -420,11 +415,11 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) @@ -437,54 +432,53 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY19]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -499,54 +493,53 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i8_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY19]], 8 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) @@ -561,54 +554,53 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -620,54 +612,53 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY19]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) @@ -682,54 +673,53 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i16_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY19]], 16 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) @@ -744,55 +734,54 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call i32 @external_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef @@ -805,12 +794,12 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef @@ -820,55 +809,54 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) @@ -881,55 +869,54 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_zeroext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) @@ -944,55 +931,54 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN-LABEL: name: test_call_external_i48_signext_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) @@ -1007,55 +993,54 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN-LABEL: name: test_call_external_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1067,55 +1052,54 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN-LABEL: name: test_call_external_p1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1127,58 +1111,57 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2p1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) @@ -1191,55 +1174,54 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN-LABEL: name: test_call_external_p3_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 %val = call i8 addrspace(3)* @external_p3_func_void() store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef @@ -1249,55 +1231,54 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2p3_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY19]](p3), [[PRED_COPY20]](p3) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3) ; GCN-NEXT: S_ENDPGM 0 @@ -1309,54 +1290,53 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1368,55 +1348,54 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call float @external_f32_func_void() store volatile float %val, float addrspace(1)* undef @@ -1426,55 +1405,54 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN-LABEL: name: test_call_external_f64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1486,58 +1464,57 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2f64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) @@ -1550,55 +1527,54 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1610,56 +1586,55 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1671,57 +1646,56 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1733,58 +1707,57 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v5i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1796,61 +1769,60 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v8i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1862,69 +1834,68 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v16i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[PRED_COPY31]](s32), [[PRED_COPY32]](s32), [[PRED_COPY33]](s32), [[PRED_COPY34]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -1936,85 +1907,84 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v32i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[PRED_COPY31]](s32), [[PRED_COPY32]](s32), [[PRED_COPY33]](s32), [[PRED_COPY34]](s32), [[PRED_COPY35]](s32), [[PRED_COPY36]](s32), [[PRED_COPY37]](s32), [[PRED_COPY38]](s32), [[PRED_COPY39]](s32), [[PRED_COPY40]](s32), [[PRED_COPY41]](s32), [[PRED_COPY42]](s32), [[PRED_COPY43]](s32), [[PRED_COPY44]](s32), [[PRED_COPY45]](s32), [[PRED_COPY46]](s32), [[PRED_COPY47]](s32), [[PRED_COPY48]](s32), [[PRED_COPY49]](s32), [[PRED_COPY50]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2026,55 +1996,54 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef @@ -2084,55 +2053,54 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2146,55 +2114,54 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4i16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2206,55 +2173,54 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v2f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() store volatile <2 x half> %val, <2 x half> addrspace(1)* undef @@ -2264,55 +2230,54 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2326,55 +2291,54 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN-LABEL: name: test_call_external_v4f16_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2386,56 +2350,55 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v3f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2447,58 +2410,57 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v5f32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -2511,60 +2473,59 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY21]](s32), [[PRED_COPY22]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[PRED_COPY9]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 @@ -2578,19 +2539,19 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY3]](s32), [[PRED_COPY4]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[PRED_COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 @@ -2603,57 +2564,56 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_a2i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[PRED_COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 @@ -2666,66 +2626,65 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN-LABEL: name: test_call_external_a5i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY21]](s32) ; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY22]](s32) ; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY23]](s32) ; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) @@ -2751,54 +2710,53 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v32i32_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) @@ -2806,7 +2764,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[PRED_COPY9]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void() %val0 = extractvalue { <32 x i32>, i32 } %val, 0 @@ -2819,54 +2777,53 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_i32_v32i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) @@ -2874,7 +2831,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[PRED_COPY9]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void() %val0 = extractvalue { i32, <32 x i32> } %val, 0 @@ -2887,53 +2844,52 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN-LABEL: name: test_call_external_v33i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) @@ -2947,18 +2903,17 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4) @@ -2968,41 +2923,41 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: $vgpr1 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[LOAD1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index b32e8211f05e2..0202ec0ba549d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -6,22 +6,21 @@ declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 ; GCN: bb.1 (%ir-block.1): - ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; GCN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -31,49 +30,49 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C3]](s64) - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C3]](s64) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) - ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C5]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[FRAME_INDEX1]](p5) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[PRED_COPY9]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 0948411064af5..2ca9c7b6de78f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -97,50 +97,49 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, define amdgpu_kernel void @test_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -153,8 +152,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -167,37 +166,37 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -208,52 +207,51 @@ define void @test_func_call_external_void_func_void() #0 { define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { ; CHECK-LABEL: name: test_call_external_void_func_empty_struct ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -264,52 +262,51 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 { define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { ; CHECK-LABEL: name: test_call_external_void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -320,53 +317,52 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 { define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -377,55 +373,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -437,55 +432,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -497,55 +491,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -556,56 +549,55 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8) ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -617,56 +609,55 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -678,53 +669,52 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -735,55 +725,54 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -795,55 +784,54 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -855,53 +843,52 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -914,13 +901,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -933,13 +920,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK: bb.1 (%ir-block.1): ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -950,54 +937,53 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_i64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1008,57 +994,56 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1070,58 +1055,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1132,57 +1116,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1194,57 +1177,56 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48_signext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1256,57 +1238,56 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1318,55 +1299,54 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { ; CHECK-LABEL: name: test_call_external_void_func_p0_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1377,57 +1357,56 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2p0 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1439,18 +1418,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -1459,43 +1437,43 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1509,18 +1487,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 @@ -1529,45 +1506,45 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1580,53 +1557,52 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1637,52 +1613,51 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1693,56 +1668,55 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1753,58 +1727,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1815,18 +1788,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 @@ -1835,42 +1807,42 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C5]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1881,54 +1853,53 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -1939,58 +1910,57 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2001,61 +1971,60 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2066,53 +2035,52 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2124,58 +2092,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2187,58 +2154,57 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2250,55 +2216,54 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2310,18 +2275,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 @@ -2329,39 +2293,39 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2372,59 +2336,58 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2436,60 +2399,59 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v7i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2501,91 +2463,90 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v63i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV63]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV64]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2597,44 +2558,43 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF3]](s16) ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2643,48 +2603,48 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV94]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV95]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2696,40 +2656,39 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v66i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -2739,48 +2698,48 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2792,53 +2751,52 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2850,55 +2808,54 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2910,56 +2867,55 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C2]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C4]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -2970,18 +2926,17 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -2989,40 +2944,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3033,18 +2988,17 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -3053,41 +3007,41 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3098,57 +3052,56 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3160,18 +3113,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3179,41 +3131,41 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C4]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C5]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C6]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3224,18 +3176,17 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3244,42 +3195,42 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C5]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C6]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C7]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3290,62 +3241,61 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3358,18 +3308,17 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 @@ -3381,45 +3330,45 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C8]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C9]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C10]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3430,70 +3379,69 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3506,89 +3454,88 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3601,18 +3548,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32 ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -3621,24 +3567,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3648,48 +3594,48 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF2]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF3]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3703,45 +3649,44 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF1]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PRED_COPY9]](p1) :: ("amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3752,55 +3697,55 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s16) = PRED_COPY [[ANYEXT]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY21]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: G_STORE [[PRED_COPY19]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF2]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF3]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3816,45 +3761,44 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[DEF1]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[PRED_COPY9]](p1) :: ("amdgpu-noclobber" load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY11]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY16]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -3867,48 +3811,48 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF2]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF3]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3923,18 +3867,17 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3943,40 +3886,40 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -3999,10 +3942,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4025,10 +3968,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4041,18 +3984,17 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val @@ -4062,41 +4004,41 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4116,50 +4058,50 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p5) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[PRED_COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4176,43 +4118,43 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY $vgpr0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4223,60 +4165,59 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT2]](s32) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4289,63 +4230,62 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT3]](s32) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4358,41 +4298,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4400,24 +4339,24 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT4]](s32) ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT5]](s32) ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT6]](s32) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4430,41 +4369,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v8i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4476,32 +4414,32 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT8]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT8]](s32) ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT9]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT9]](s32) ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT10]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT10]](s32) ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT11]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT11]](s32) ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT12]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT12]](s32) ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT13]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT13]](s32) ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT14]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT14]](s32) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT15]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT15]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4514,41 +4452,40 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-LABEL: name: test_call_external_void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) @@ -4568,48 +4505,48 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT16]](s32) ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[ANYEXT17]](s32) ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[ANYEXT18]](s32) ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[ANYEXT19]](s32) ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[ANYEXT20]](s32) ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[ANYEXT21]](s32) ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[ANYEXT22]](s32) ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[ANYEXT23]](s32) ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[ANYEXT24]](s32) ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[ANYEXT25]](s32) ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[ANYEXT26]](s32) ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[ANYEXT27]](s32) ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[ANYEXT28]](s32) ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[ANYEXT29]](s32) ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[ANYEXT30]](s32) ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4622,18 +4559,17 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 @@ -4641,24 +4577,24 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg @@ -4672,48 +4608,48 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -4727,15 +4663,15 @@ define void @stack_12xv3i32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4766,15 +4702,15 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4803,48 +4739,48 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -4870,15 +4806,15 @@ define void @stack_12xv3f32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4909,15 +4845,15 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4946,48 +4882,48 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5013,15 +4949,15 @@ define void @stack_8xv5i32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -5048,15 +4984,15 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5093,48 +5029,48 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5156,15 +5092,15 @@ define void @stack_8xv5f32() #0 { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5191,15 +5127,15 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5236,48 +5172,48 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = PRED_COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = PRED_COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = PRED_COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = PRED_COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = PRED_COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = PRED_COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = PRED_COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = PRED_COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = PRED_COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = PRED_COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = PRED_COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = PRED_COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = PRED_COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = PRED_COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = PRED_COPY [[UV30]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN @@ -5300,27 +5236,27 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY [[DEF]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[C1]](p4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p4) = PRED_COPY [[C1]](p4) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[DEF]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY1]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[DEF1]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY6]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY1]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[DEF1]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[DEF2]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY3]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY5]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index 138e66ffd6c7a..0eec240b9ff32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -11,11 +11,11 @@ define i32 @test() { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -71,7 +71,7 @@ define i32 @test_fcmp_constexpr() { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ZEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll index e01d9c399e937..019a18ea3bebf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -6,10 +6,10 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -20,10 +20,10 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val @@ -34,10 +34,10 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nofpexcept G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -48,10 +48,10 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -62,10 +62,10 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val @@ -76,16 +76,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val @@ -96,16 +96,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val @@ -116,16 +116,16 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val @@ -136,10 +136,10 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FSUB:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FSUB [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FSUB]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -150,10 +150,10 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FMUL:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMUL [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FMUL]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -164,10 +164,10 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FDIV:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FDIV [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FDIV]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -178,10 +178,10 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[STRICT_FREM:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FREM [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FREM]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -192,11 +192,11 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[STRICT_FMA:%[0-9]+]]:_(s32) = nsz nofpexcept G_STRICT_FMA [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FMA]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val @@ -207,9 +207,9 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[PRED_COPY]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[STRICT_FSQRT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index af0dd868c9092..e0ded4a97ca59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -9,37 +9,37 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED: bb.1 (%ir-block.0): ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; FIXED-NEXT: {{ $}} - ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; FIXED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; FIXED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; FIXED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; FIXED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; FIXED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; FIXED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; FIXED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; FIXED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; FIXED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; FIXED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; FIXED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; FIXED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; FIXED-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; FIXED-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; FIXED-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; FIXED-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; FIXED-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; FIXED-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; FIXED-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; FIXED-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; FIXED-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; FIXED-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; FIXED-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; FIXED-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; FIXED-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; FIXED-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; FIXED-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; FIXED-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; FIXED-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; FIXED-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; FIXED-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 8adc0ac7009b6..5a8b0cd0c885c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -10,9 +10,9 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void @@ -23,9 +23,9 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void @@ -36,8 +36,8 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -50,8 +50,8 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -70,8 +70,8 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -91,8 +91,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -126,8 +126,8 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) @@ -141,8 +141,8 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -161,8 +161,8 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -181,8 +181,8 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -195,8 +195,8 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -215,8 +215,8 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -235,8 +235,8 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -249,8 +249,8 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) @@ -264,8 +264,8 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[PRED_COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) @@ -279,9 +279,9 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void @@ -293,9 +293,9 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void @@ -307,9 +307,9 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void @@ -320,9 +320,9 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef ret void @@ -333,9 +333,9 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) @@ -349,9 +349,9 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -370,9 +370,9 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -391,9 +391,9 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -406,10 +406,10 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) @@ -423,10 +423,10 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -445,10 +445,10 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -467,10 +467,10 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -483,9 +483,9 @@ define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -498,9 +498,9 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -513,8 +513,8 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -527,9 +527,9 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store float %arg0, float addrspace(1)* undef ret void @@ -540,9 +540,9 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -555,9 +555,9 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -570,9 +570,9 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) @@ -586,10 +586,10 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) @@ -603,10 +603,10 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -621,12 +621,12 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -641,14 +641,14 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -663,9 +663,9 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY]](p3), [[PRED_COPY1]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -678,10 +678,10 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -694,11 +694,11 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -711,12 +711,12 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -729,15 +729,15 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -750,23 +750,23 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -779,40 +779,40 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -826,42 +826,42 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -874,12 +874,12 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) @@ -893,12 +893,12 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) @@ -912,12 +912,12 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) @@ -931,15 +931,15 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) @@ -953,18 +953,18 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) @@ -978,21 +978,21 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) @@ -1006,30 +1006,30 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) @@ -1043,55 +1043,55 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY16]](s32), [[PRED_COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY18]](s32), [[PRED_COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY26]](s32), [[PRED_COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY28]](s32), [[PRED_COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) @@ -1105,9 +1105,9 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void @@ -1118,9 +1118,9 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1135,9 +1135,9 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1150,10 +1150,10 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1168,11 +1168,11 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1185,15 +1185,15 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1208,42 +1208,42 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>), [[PRED_COPY8]](<2 x s16>), [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>), [[PRED_COPY12]](<2 x s16>), [[PRED_COPY13]](<2 x s16>), [[PRED_COPY14]](<2 x s16>), [[PRED_COPY15]](<2 x s16>), [[PRED_COPY16]](<2 x s16>), [[PRED_COPY17]](<2 x s16>), [[PRED_COPY18]](<2 x s16>), [[PRED_COPY19]](<2 x s16>), [[PRED_COPY20]](<2 x s16>), [[PRED_COPY21]](<2 x s16>), [[PRED_COPY22]](<2 x s16>), [[PRED_COPY23]](<2 x s16>), [[PRED_COPY24]](<2 x s16>), [[PRED_COPY25]](<2 x s16>), [[PRED_COPY26]](<2 x s16>), [[PRED_COPY27]](<2 x s16>), [[PRED_COPY28]](<2 x s16>), [[PRED_COPY29]](<2 x s16>), [[PRED_COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1258,9 +1258,9 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1273,10 +1273,10 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1289,11 +1289,11 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1306,15 +1306,15 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1327,23 +1327,23 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1356,12 +1356,12 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) @@ -1375,15 +1375,15 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) @@ -1397,18 +1397,18 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) @@ -1422,30 +1422,30 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) @@ -1459,55 +1459,55 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY14]](s32), [[PRED_COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY16]](s32), [[PRED_COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY18]](s32), [[PRED_COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY20]](s32), [[PRED_COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY22]](s32), [[PRED_COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY24]](s32), [[PRED_COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY26]](s32), [[PRED_COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY28]](s32), [[PRED_COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) @@ -1521,9 +1521,9 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void @@ -1534,9 +1534,9 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1551,9 +1551,9 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1566,11 +1566,11 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1583,15 +1583,15 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<2 x s16>), [[PRED_COPY3]](<2 x s16>), [[PRED_COPY4]](<2 x s16>), [[PRED_COPY5]](<2 x s16>), [[PRED_COPY6]](<2 x s16>), [[PRED_COPY7]](<2 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -1605,16 +1605,16 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[PRED_COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef @@ -1627,9 +1627,9 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, { i32 } addrspace(1)* undef ret void @@ -1640,15 +1640,15 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void @@ -1658,11 +1658,11 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -1680,18 +1680,18 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY1]], [[C]](s32) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -1700,7 +1700,7 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 @@ -1714,15 +1714,15 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[PRED_COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 @@ -1735,15 +1735,15 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[PRED_COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 @@ -1757,19 +1757,19 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[PRED_COPY]], [[C2]](s32) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4 @@ -1777,7 +1777,7 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(p1) = G_CONSTANT i64 8 ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PRED_COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 @@ -1792,49 +1792,49 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX2]](p5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 @@ -1848,49 +1848,49 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PRED_COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[PRED_COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: SI_RETURN store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 @@ -1903,40 +1903,40 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1945,11 +1945,11 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PRED_COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[PRED_COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef @@ -1963,40 +1963,40 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32) @@ -2008,15 +2008,15 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[PRED_COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PRED_COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PRED_COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[PRED_COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef @@ -2031,50 +2031,50 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PRED_COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[PRED_COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef @@ -2087,40 +2087,40 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 @@ -2132,11 +2132,11 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PRED_COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[PRED_COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef @@ -2149,50 +2149,50 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[PRED_COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[PRED_COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef @@ -2205,40 +2205,40 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2262,11 +2262,11 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[PRED_COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[PRED_COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef @@ -2279,40 +2279,40 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 @@ -2332,11 +2332,11 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PRED_COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef @@ -2349,40 +2349,40 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2418,11 +2418,11 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[PRED_COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[PRED_COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef @@ -2435,40 +2435,40 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 @@ -2536,11 +2536,11 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[PRED_COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[PRED_COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef @@ -2554,23 +2554,23 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p3) = PRED_COPY [[DEF]](p3) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[PRED_COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 @@ -2587,11 +2587,11 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2602,7 +2602,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[PRED_COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 @@ -2620,38 +2620,38 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY8]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -2667,40 +2667,40 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32), [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[PRED_COPY14]](s32), [[PRED_COPY15]](s32), [[PRED_COPY16]](s32), [[PRED_COPY17]](s32), [[PRED_COPY18]](s32), [[PRED_COPY19]](s32), [[PRED_COPY20]](s32), [[PRED_COPY21]](s32), [[PRED_COPY22]](s32), [[PRED_COPY23]](s32), [[PRED_COPY24]](s32), [[PRED_COPY25]](s32), [[PRED_COPY26]](s32), [[PRED_COPY27]](s32), [[PRED_COPY28]](s32), [[PRED_COPY29]](s32), [[PRED_COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 @@ -2736,9 +2736,9 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[PRED_COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef @@ -2750,26 +2750,26 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p3) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[PRED_COPY5]](s32), [[PRED_COPY6]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p1) = PRED_COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[PRED_COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[PRED_COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[PRED_COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: SI_RETURN %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 @@ -2788,16 +2788,16 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspac ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p3) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p3) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY4]](p3), [[PRED_COPY5]](p3) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index 2e660884e7db1..d6c7287b3f885 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -7,30 +7,30 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx ret <2 x i32 addrspace(1)*> %gep @@ -42,20 +42,20 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p3) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PRED_COPY]](p3), [[PRED_COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(<2 x p3>) = PRED_COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY4]](<2 x p3>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(3)*> %gep @@ -67,27 +67,27 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY6]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(1)*> %gep @@ -99,28 +99,28 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(<2 x s64>) = PRED_COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[PRED_COPY6]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY7]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx ret <2 x i32 addrspace(1)*> %gep @@ -132,26 +132,26 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY4]](s32), [[PRED_COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY5]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx ret <2 x i32 addrspace(1)*> %gep @@ -163,19 +163,19 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY4]](s32), [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -185,12 +185,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x p1>) = PRED_COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY8]](<2 x p1>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> ret <2 x i32 addrspace(1)*> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index 3e8197462e313..b1b7d53da9b94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -4,51 +4,50 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p4) = PRED_COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[PRED_COPY10]], [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY15]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[DEF]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[DEF1]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[OR1]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 @@ -61,12 +60,12 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 89235e3f05de2..78c73214f97b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -52,10 +52,10 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1966091 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 @@ -68,8 +68,8 @@ define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind @@ -80,8 +80,8 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind @@ -92,8 +92,8 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -105,10 +105,10 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 1966090 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 @@ -122,11 +122,11 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %0, 3211274 /* regdef:VReg_64 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY %1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY1]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 @@ -139,9 +139,9 @@ define float @test_vector_output() nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x s32>) = PRED_COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[PRED_COPY]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[EVEC]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 @@ -152,8 +152,8 @@ define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK-LABEL: name: test_input_vgpr_imm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:VGPR_32 */, [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -163,8 +163,8 @@ define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK-LABEL: name: test_input_sgpr_imm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2097161 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2097161 /* reguse:SReg_32 */, [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -186,11 +186,11 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 1966089 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 1966089 /* reguse:VGPR_32 */, [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind @@ -202,10 +202,10 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1966090 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[PRED_COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* elementtype(i32) %a) ret i32 %1 @@ -216,13 +216,13 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY1]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) @@ -233,14 +233,14 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %4, 2097161 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY %2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %4, 2097161 /* reguse:SReg_32 */, [[PRED_COPY2]], 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY3]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY %4 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind @@ -254,20 +254,20 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 1966090 /* regdef:VGPR_32 */, def %4, 1966090 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def %3, 1966090 /* regdef:VGPR_32 */, def %4, 1966090 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[PRED_COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[PRED_COPY5]](tied-def 5) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY %3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY %4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY %5 + ; CHECK-NEXT: G_STORE [[PRED_COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[PRED_COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 @@ -283,11 +283,11 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2097162 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY %0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1966090 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[PRED_COPY1]](tied-def 3) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY %2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll index 45db580e15458..8848549fb580a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll @@ -13,7 +13,7 @@ define i32 @load_const_i32_gv() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p1) :: (dereferenceable invariant load (s32) from @const_gv0, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i32, ptr addrspace(1) @const_gv0, align 4 ret i32 %load @@ -24,13 +24,13 @@ define i32 @load_select_const_i32_gv(i1 %cond) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY]](s32) ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv0 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @const_gv1 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p1) = G_SELECT [[TRUNC]](s1), [[GV]], [[GV1]] ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[SELECT]](p1) :: (dereferenceable invariant load (s32) from %ir.select, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %select = select i1 %cond, ptr addrspace(1) @const_gv0, ptr addrspace(1) @const_gv1 %load = load i32, ptr addrspace(1) %select, align 4 @@ -46,9 +46,9 @@ define { i32, i64 } @load_const_struct_gv() { ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[GV]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (dereferenceable invariant load (s64) from @const_struct_gv + 8, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[LOAD]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %load = load { i32, i64 }, ptr addrspace(1) @const_struct_gv, align 8 ret { i32, i64 } %load @@ -59,12 +59,12 @@ define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN @@ -77,12 +77,12 @@ define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN @@ -95,12 +95,12 @@ define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4) ; CHECK-NEXT: SI_RETURN diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index 8e90bd593a108..bea2f051cf5da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -8,13 +8,13 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void @@ -25,12 +25,12 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void @@ -41,13 +41,13 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[PRED_COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void @@ -58,13 +58,13 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) ret void @@ -75,12 +75,12 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) ret void @@ -91,13 +91,13 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: G_MEMCPY [[PRED_COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) ret void @@ -108,13 +108,13 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void @@ -125,12 +125,12 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void @@ -141,13 +141,13 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p3) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[PRED_COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void @@ -158,11 +158,11 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -175,11 +175,11 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) @@ -193,11 +193,11 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) @@ -211,12 +211,12 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) ret void @@ -227,11 +227,11 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) ret void @@ -242,12 +242,12 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: G_MEMSET [[PRED_COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index a1a72c07cb631..1a45e5f50858d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -9,7 +9,7 @@ define i32 @reloc_constant() { ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] - ; CHECK: $vgpr0 = COPY [[SUM]](s32) + ; CHECK: $vgpr0 = PRED_COPY [[SUM]](s32) ; CHECK: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll index dab8b64e75f45..4e2c9f85cf157 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll @@ -9,8 +9,8 @@ define i8 addrspace(7)* @no_auto_constfold_gep() { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p7) = G_PTR_ADD [[C]], [[C1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTR_ADD]](p7) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i8, i8 addrspace(7)* null, i64 123 ret i8 addrspace(7)* %gep @@ -25,12 +25,12 @@ define <2 x i8 addrspace(7)*> @no_auto_constfold_gep_vector() { ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p7>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p7>) = COPY [[PTR_ADD]](<2 x p7>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x p7>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(<2 x p7>) = PRED_COPY [[PTR_ADD]](<2 x p7>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x p7>) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i8, <2 x i8 addrspace(7)*> zeroinitializer, <2 x i64> ret <2 x i8 addrspace(7)*> %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll index af0daceac6c61..63d6905b5d13e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -6,16 +6,16 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) ret i8* %masked @@ -26,14 +26,14 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) ret i8* %masked @@ -44,15 +44,15 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) ret i8* %masked @@ -63,15 +63,15 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY2]](s32) ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) ret i8* %masked @@ -82,12 +82,12 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[MV]](s64) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) ret i8 addrspace(3)* %masked @@ -98,10 +98,10 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) ret i8 addrspace(3)* %masked @@ -112,11 +112,11 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[TRUNC]](s16) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) ret i8 addrspace(3)* %masked @@ -127,11 +127,11 @@ define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[PRED_COPY]], [[TRUNC]](s1) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PTRMASK]](p3) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) ret i8 addrspace(3)* %masked diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll index 4faea44d1582e..6b503d101895f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -6,13 +6,13 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -24,10 +24,10 @@ define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UADDSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -39,16 +39,16 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -60,16 +60,16 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -81,13 +81,13 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -99,10 +99,10 @@ define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SADDSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -114,16 +114,16 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -135,16 +135,16 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -156,13 +156,13 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -174,10 +174,10 @@ define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[USUBSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -189,16 +189,16 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -210,16 +210,16 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -231,13 +231,13 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -249,10 +249,10 @@ define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[PRED_COPY]], [[PRED_COPY1]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SSUBSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -264,16 +264,16 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -285,16 +285,16 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -306,13 +306,13 @@ define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -324,10 +324,10 @@ define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[USHLSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -339,16 +339,16 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -360,16 +360,16 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res @@ -381,13 +381,13 @@ define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res @@ -399,10 +399,10 @@ define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[PRED_COPY]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[SSHLSAT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res @@ -414,16 +414,16 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res @@ -435,16 +435,16 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 20208f1336625..2e5670fb82655 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -7,10 +7,10 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 @@ -21,15 +21,15 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 @@ -43,14 +43,14 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -62,19 +62,19 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -89,19 +89,19 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -116,14 +116,14 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY $sgpr4_sgpr5 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -147,12 +147,12 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](s32) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[EVEC]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[EVEC1]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 entry: @@ -165,12 +165,12 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[LOAD]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 %add0 = add i32 %arg0, %arg1.load @@ -183,24 +183,24 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[PRED_COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) @@ -215,37 +215,37 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -256,9 +256,9 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) @@ -270,47 +270,47 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PRED_COPY]], [[PRED_COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] - ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ADD2]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 @@ -325,37 +325,37 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -369,39 +369,39 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -413,37 +413,37 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -462,39 +462,39 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -512,8 +512,8 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 @@ -527,43 +527,43 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C]](s32) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) @@ -576,24 +576,24 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY5]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -608,37 +608,37 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 @@ -657,39 +657,39 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[COPY3]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[COPY4]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[COPY5]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[COPY6]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[COPY7]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[COPY8]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[COPY9]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[COPY10]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[COPY11]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[COPY12]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[COPY13]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[COPY14]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[COPY21]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[COPY22]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[COPY23]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[COPY24]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[COPY25]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[COPY26]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[COPY27]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY3]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY4]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY5]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY6]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY7]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY8]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY10]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY11]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY12]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY13]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[PRED_COPY21]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[PRED_COPY22]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[PRED_COPY23]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[PRED_COPY24]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[PRED_COPY25]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[PRED_COPY26]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[PRED_COPY27]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[PRED_COPY28]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[PRED_COPY29]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[PRED_COPY30]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -704,37 +704,37 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 @@ -762,39 +762,39 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -811,46 +811,46 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 @@ -934,33 +934,33 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY43]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY44]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY45]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY46]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY47]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY48]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -979,46 +979,46 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr9 + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr10 + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr11 + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY $vgpr12 + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY $vgpr13 + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY $vgpr14 + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY $vgpr15 + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY $vgpr16 + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY $vgpr17 + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY $vgpr18 + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY $vgpr19 + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY $vgpr20 + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY $vgpr21 + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY $vgpr22 + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY $vgpr23 + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY $vgpr24 + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY $vgpr25 + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY $vgpr26 + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY $vgpr27 + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY $vgpr28 + ; GCN-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY $vgpr29 + ; GCN-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 @@ -1098,64 +1098,64 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr4 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr5 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr6 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr7 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr8 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr9 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr10 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr11 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr12 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr13 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr14 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr15 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr16 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr17 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr18 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr19 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr20 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr21 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr22 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr23 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr24 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr25 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr26 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr27 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) - ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: G_STORE [[PRED_COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr4 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr5 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr6 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr7 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr8 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr9 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr10 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr11 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr12 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr13 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr14 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr15 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr16 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr17 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr18 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr19 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr20 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr21 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr22 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr23 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr24 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr25 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr26 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr27 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr28 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr29 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: $vgpr30 = PRED_COPY [[C1]](s32) + ; GCN-NEXT: [[PRED_COPY49:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY43]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY44]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY45]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY46]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY47]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY48]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1171,42 +1171,42 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1220,42 +1220,42 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspac ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) @@ -1269,40 +1269,40 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1316,40 +1316,40 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY13]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY14]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1363,47 +1363,47 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV7]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV8]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1417,42 +1417,42 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY9]](<2 x s16>), [[PRED_COPY10]](<2 x s16>) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY14]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY15]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY16]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1466,48 +1466,48 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN: bb.1.entry: ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY11]](s32), [[PRED_COPY12]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY22]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY13]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY14]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY15]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY16]](s64) + ; GCN-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY17]](s32) + ; GCN-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY18]](s32) + ; GCN-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY19]](s32) + ; GCN-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY20]](s32) + ; GCN-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY21]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index 403375d6389f8..8184b89ed6a82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -8,14 +8,14 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), -1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[PRED_COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %extract0 = extractvalue { float, i1 } %call, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index b9cdcff95ed1e..4193fe2ee6faa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -8,36 +8,36 @@ define void @tail_call_void_func_void() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr31 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32(s32) = PRED_COPY $sgpr15 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr14 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr13 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr12 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_64 = PRED_COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY7]](p4) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p4) = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s64) = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(<4 x s32>) = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY [[PRED_COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = PRED_COPY [[PRED_COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = PRED_COPY [[PRED_COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = PRED_COPY [[PRED_COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = PRED_COPY [[PRED_COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[PRED_COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = PRED_COPY [[PRED_COPY17]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 tail call void @external_void_func_void() ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll index 5d63ad0c1f17f..88663fc641624 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-zext-vec-index.ll @@ -8,7 +8,7 @@ define i8 @f_i1_1() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i1 true ret i8 %E1 @@ -21,7 +21,7 @@ define i8 @f_i8_255() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %E1 = extractelement <256 x i8> undef, i8 255 ret i8 %E1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll index 72c0adcd91ccf..1ba4e0e4ae327 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -12,9 +12,11 @@ define void @func_use_lds_global() { ; GFX8-LABEL: func_use_lds_global: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_trap 2 ; GFX8-NEXT: ds_write_b32 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -37,7 +39,9 @@ define void @func_use_lds_global_constexpr_cast() { ; GFX8-LABEL: func_use_lds_global_constexpr_cast: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-NEXT: s_mov_b64 s[4:5], 0xc8 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_trap 2 ; GFX8-NEXT: flat_store_dword v[0:1], v0 ; GFX8-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll index ecfacca5d6972..8455f82ae14e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -12,24 +12,24 @@ define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 % ; GCN-LABEL: name: load_zeroinit_lds_global ; GCN: bb.1 (%ir-block.0): ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr0_sgpr1 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 40 ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @lds ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 - ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 36, 0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]], 36, 0 + ; GFX8: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_U32_]] ; GCN: $m0 = S_MOV_B32 -1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX8: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 0, 0, implicit $m0, implicit $exec - ; GFX9: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 40, 0, implicit $m0, implicit $exec + ; GFX9: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; GFX8: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec + ; GFX9: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[PRED_COPY1]], 40, 0, implicit $m0, implicit $exec ; GFX8: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 ; GFX8: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_LOAD_DWORDX2_IMM]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX8: BUFFER_STORE_DWORD_OFFSET [[DS_READ_B32_]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec - ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX9: FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX9: FLAT_STORE_DWORD [[PRED_COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_ENDPGM 0 %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10 %ld = load i32, i32 addrspace(3)* %gep diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index cb4989c3ee2d0..109d12ffa77b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -209,19 +209,16 @@ body: | liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p5_to_p0 - ; VI: liveins: $vgpr0, $sgpr4_sgpr5 + ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 228 + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 @@ -304,19 +301,16 @@ body: | liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p3_to_p0 - ; VI: liveins: $vgpr0, $sgpr4_sgpr5 + ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) - ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 232 + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0 @@ -539,24 +533,20 @@ body: | liveins: $vgpr0_vgpr1 ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0 - ; VI: liveins: $vgpr0_vgpr1, $sgpr4_sgpr5 + ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) + ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 232 + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p4) = PRED_COPY [[C]](p4) + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PRED_COPY]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](s64) - ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) + ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]] @@ -764,18 +754,13 @@ stack: body: | bb.0: ; VI-LABEL: name: test_addrspacecast_p5_fi_to_p0 - ; VI: liveins: $sgpr4_sgpr5 - ; VI-NEXT: {{ $}} - ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; VI-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; VI: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 228 + ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[MV]](p0) - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY [[MV]](p0) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0 ; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 @@ -783,8 +768,8 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C]](s32) ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32) - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(p0) = PRED_COPY [[MV]](p0) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](p0) ; SI-LABEL: name: test_addrspacecast_p5_fi_to_p0 ; SI: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; SI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[FRAME_INDEX]](p5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir index 915139b590fd4..a29ec1e5f0573 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir @@ -45,9 +45,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_x_unpacked ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 8 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 8 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) S_ENDPGM 0, implicit %0 @@ -66,9 +66,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_y_unpacked ; GCN: liveins: $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 3 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 3 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y) S_ENDPGM 0, implicit %0 @@ -87,9 +87,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_z_unpacked ; GCN: liveins: $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PRED_COPY]](s32) + ; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[PRED_COPY1]], 2 ; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z) S_ENDPGM 0, implicit %0 @@ -107,9 +107,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_x_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) %0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x) S_ENDPGM 0, implicit %0 @@ -127,9 +127,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_y_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) @@ -149,9 +149,9 @@ body: | ; GCN-LABEL: name: test_workitem_id_z_packed ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr0 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index b83231680aa56..75b768f1ec80c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -456,8 +456,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -538,8 +538,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -558,10 +558,10 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -569,10 +569,10 @@ body: | ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir index 023617350e52f..3e4aa3e6de694 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -638,13 +638,13 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[PRED_COPY]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 7597f34a326d3..153328fd6fde1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -565,17 +565,17 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -601,17 +601,17 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -662,29 +662,29 @@ body: | ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -714,29 +714,29 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY4]], [[TRUNC4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY5]], [[TRUNC5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY6]], [[TRUNC6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY7]], [[TRUNC7]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -865,29 +865,29 @@ body: | ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -935,10 +935,10 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) @@ -1044,53 +1044,53 @@ body: | ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[PRED_COPY12]] ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[PRED_COPY13]] ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[PRED_COPY14]] ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[PRED_COPY15]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1223,14 +1223,14 @@ body: | ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC]](s4) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC1]](s4) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC2]](s4) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC3]](s4) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC4]](s4) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC5]](s4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC6]](s4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s4) = PRED_COPY [[TRUNC7]](s4) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) @@ -1334,41 +1334,41 @@ body: | ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1463,23 +1463,23 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY3]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY4]], [[TRUNC4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY5]], [[TRUNC5]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -1529,14 +1529,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[PRED_COPY2]], [[TRUNC2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) @@ -1705,14 +1705,14 @@ body: | ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) @@ -1780,53 +1780,53 @@ body: | ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[PRED_COPY1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[PRED_COPY2]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[PRED_COPY3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[PRED_COPY4]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[PRED_COPY5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[PRED_COPY6]] ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[PRED_COPY7]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[PRED_COPY8]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[PRED_COPY9]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[PRED_COPY10]] ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[PRED_COPY11]] ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[PRED_COPY12]] ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[PRED_COPY13]] ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[PRED_COPY14]] ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s16) = PRED_COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[PRED_COPY15]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) @@ -1881,10 +1881,10 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) @@ -1899,14 +1899,14 @@ body: | ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) @@ -1921,18 +1921,18 @@ body: | ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) @@ -2050,8 +2050,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -2060,10 +2060,10 @@ body: | ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) @@ -2072,12 +2072,12 @@ body: | ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) @@ -2086,14 +2086,14 @@ body: | ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) @@ -2102,16 +2102,16 @@ body: | ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) @@ -2120,18 +2120,18 @@ body: | ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC21]](s8) ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) @@ -2140,20 +2140,20 @@ body: | ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC24]](s8) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC25]](s8) ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) @@ -2640,30 +2640,30 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir index 2b855e33e96d4..315806fe5541a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -14,11 +14,11 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX8-LABEL: name: bswap_s8 @@ -51,10 +51,10 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -86,11 +86,11 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX8-LABEL: name: bswap_s24 @@ -147,18 +147,18 @@ body: | ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -193,26 +193,26 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY4]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; GFX7-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY6]](s32) + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[PRED_COPY3]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -284,34 +284,34 @@ body: | ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C1]](s32) ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[PRED_COPY1]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[PRED_COPY3]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[PRED_COPY5]](s32) ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY6]](s32) ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index 1eacc83b709ab..e716a9cf69bb2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -88,8 +88,8 @@ body: | ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -155,13 +155,13 @@ body: | ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[SUB1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -187,8 +187,8 @@ body: | ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index 13339d22269f2..70c0b0424d0ca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -98,8 +98,8 @@ body: | ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -173,14 +173,14 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32) ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[SUB1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -208,8 +208,8 @@ body: | ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[SUB]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir index 2753b7a503efd..bf03e70955fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -13,8 +13,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 @@ -35,8 +35,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s9) = G_TRUNC %0 @@ -130,8 +130,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -196,12 +196,12 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -225,8 +225,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTPOP]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 @@ -285,67 +285,67 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[C4]](s16) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[COPY2]], [[C5]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[COPY1]], [[SHL]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY1]], [[C5]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY3]], [[C6]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY2]], [[C6]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[COPY4]], [[C7]](s16) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY3]], [[C7]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[C8]](s16) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY4]], [[C8]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[COPY6]], [[C9]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY5]], [[C9]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[COPY7]], [[C10]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY6]], [[C10]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[COPY8]], [[C11]](s16) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY7]], [[C11]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[COPY9]], [[C12]](s16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY8]], [[C12]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[COPY10]], [[C13]](s16) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY9]], [[C13]](s16) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[COPY11]], [[C14]](s16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY10]], [[C14]](s16) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[COPY12]], [[C15]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY11]], [[C15]](s16) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[COPY13]], [[C16]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY12]], [[C16]](s16) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[COPY14]], [[C17]](s16) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY13]], [[C17]](s16) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[C4]](s16) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s16) = PRED_COPY [[C4]](s16) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[COPY15]], [[C18]](s16) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[PRED_COPY14]], [[C18]](s16) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[OR12]], [[SHL13]] ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[C4]], [[C19]](s16) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[MV2]](s64) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s64) = PRED_COPY [[MV2]](s64) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C20]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C20]](s32) @@ -360,17 +360,17 @@ body: | ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL16]] ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C20]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL17]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C20]](s32) - ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL18]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C20]](s32) + ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY17]], [[SHL18]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR17]](s32), [[OR18]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV5]](s128), 0 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[COPY16]], [[EXTRACT]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[PRED_COPY15]], [[EXTRACT]] ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](s32) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir index 6caa14dd6a27e..100f132c7318c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -84,9 +84,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -149,12 +149,12 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[LSHR]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) @@ -176,9 +176,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index e29793a03d4f1..8a6e38c5cdaf4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -94,9 +94,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 @@ -167,13 +167,13 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[C1]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) @@ -197,9 +197,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]] ; CHECK-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index fd33f3cea56ed..5d7b123b8a61a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -12,8 +12,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -30,8 +30,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -48,8 +48,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -66,8 +66,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -84,8 +84,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -102,8 +102,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<5 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -121,8 +121,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<6 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -140,8 +140,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<7 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -159,8 +159,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<8 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -178,8 +178,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(<16 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0 %2:_(s32) = G_CONSTANT i32 0 @@ -235,8 +235,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -272,8 +272,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -290,8 +290,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -337,8 +337,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -363,8 +363,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -586,8 +586,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 @@ -609,9 +609,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 @@ -633,9 +633,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 3 @@ -657,9 +657,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 4 @@ -681,9 +681,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 5 @@ -705,9 +705,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 7 @@ -821,8 +821,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -843,8 +843,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -865,8 +865,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -965,8 +965,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -985,8 +985,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1005,8 +1005,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PRED_COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1025,8 +1025,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_CONSTANT i64 0 %2:_(s32) = G_TRUNC %1 @@ -1047,8 +1047,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1071,8 +1071,8 @@ body: | ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1092,8 +1092,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32), implicit [[DEF]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[DEF]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](s32), implicit [[DEF]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1119,8 +1119,8 @@ body: | ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p3) = PRED_COPY [[UV1]](p3) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 6630300bcc96b..f1a651c99f14f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -214,8 +214,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -231,8 +231,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -248,8 +248,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -265,8 +265,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -282,8 +282,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 64 $vgpr0 = COPY %1 @@ -299,8 +299,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir index 6f086c613eba8..0a6d9d3b09594 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -698,8 +698,8 @@ body: | ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -721,8 +721,8 @@ body: | ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -744,8 +744,8 @@ body: | ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ZEXT1]], [[C2]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] @@ -778,8 +778,8 @@ body: | ; SI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) @@ -799,8 +799,8 @@ body: | ; VI-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) @@ -820,8 +820,8 @@ body: | ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index 7618cab6b7202..cc9c770378410 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -260,9 +260,9 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -434,9 +434,9 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) @@ -459,11 +459,11 @@ body: | ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] @@ -490,11 +490,11 @@ body: | ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] @@ -531,33 +531,33 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -573,33 +573,33 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -615,33 +615,33 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -709,9 +709,9 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -723,9 +723,9 @@ body: | ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY1]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -965,9 +965,9 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -979,9 +979,9 @@ body: | ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY1]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -993,9 +993,9 @@ body: | ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY2]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index b82b212665c11..4d9c48d06b797 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -219,9 +219,9 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -229,14 +229,14 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -250,9 +250,9 @@ body: | ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -265,9 +265,9 @@ body: | ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) @@ -318,9 +318,9 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -463,8 +463,8 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] @@ -485,9 +485,9 @@ body: | ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) @@ -515,9 +515,9 @@ body: | ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) @@ -559,30 +559,30 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -600,30 +600,30 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -641,30 +641,30 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -735,22 +735,22 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL5]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -764,9 +764,9 @@ body: | ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND10]](s16) ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[ZEXT5]](s32) @@ -779,9 +779,9 @@ body: | ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[ZEXT7]](s32) @@ -793,20 +793,20 @@ body: | ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[COPY13]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[PRED_COPY7]](s32) ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND18]](s16) ; SI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND20]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL10]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[PRED_COPY8]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[C]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY9]], [[SHL10]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) @@ -818,9 +818,9 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[SHL9]], [[C1]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[PRED_COPY11]](s32) ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND22]](s16) ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C1]] ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND24]], [[ZEXT11]](s32) @@ -897,9 +897,9 @@ body: | ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR5]] ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL5]] + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL5]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST6]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -928,10 +928,10 @@ body: | ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND10]](s16) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR12]] ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL10]] + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL10]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST5]], [[BITCAST8]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) @@ -1090,9 +1090,9 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) @@ -1100,14 +1100,14 @@ body: | ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[PRED_COPY1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[PRED_COPY2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -1121,9 +1121,9 @@ body: | ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY5]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) @@ -1136,9 +1136,9 @@ body: | ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY6]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) @@ -1159,9 +1159,9 @@ body: | ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[PRED_COPY7]](s32) ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) @@ -1173,9 +1173,9 @@ body: | ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[PRED_COPY8]](s32) ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) @@ -1183,14 +1183,14 @@ body: | ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[PRED_COPY9]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[PRED_COPY10]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[C1]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY11]], [[SHL12]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) @@ -1204,9 +1204,9 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[PRED_COPY13]](s32) ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) @@ -1219,9 +1219,9 @@ body: | ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[PRED_COPY14]](s32) ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) @@ -1276,9 +1276,9 @@ body: | ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -1332,10 +1332,10 @@ body: | ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C1]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL12]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index a166c2d45abbc..23795cf614a2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -135,9 +135,9 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -149,9 +149,9 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -163,9 +163,9 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -191,9 +191,9 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX8-LABEL: name: test_icmp_s24 @@ -202,9 +202,9 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX9-LABEL: name: test_icmp_s24 @@ -213,9 +213,9 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[AND]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s24) = G_CONSTANT i24 0 @@ -904,25 +904,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: test_icmp_s33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX9-LABEL: name: test_icmp_s33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 1dcb2bf3e42a6..1b4891a8d0735 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -423,256 +423,256 @@ body: | ; CHECK-NEXT: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK-NEXT: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD3]](p5) + ; CHECK-NEXT: G_STORE [[UV1]](s32), [[PRED_COPY]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK-NEXT: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD4]](p5) + ; CHECK-NEXT: G_STORE [[UV2]](s32), [[PRED_COPY1]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK-NEXT: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD5]](p5) + ; CHECK-NEXT: G_STORE [[UV3]](s32), [[PRED_COPY2]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD6]](p5) + ; CHECK-NEXT: G_STORE [[UV4]](s32), [[PRED_COPY3]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK-NEXT: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD7]](p5) + ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PRED_COPY4]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD8]](p5) + ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PRED_COPY5]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK-NEXT: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD9]](p5) + ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PRED_COPY6]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK-NEXT: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD10]](p5) + ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PRED_COPY7]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK-NEXT: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD11]](p5) + ; CHECK-NEXT: G_STORE [[UV9]](s32), [[PRED_COPY8]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD12]](p5) + ; CHECK-NEXT: G_STORE [[UV10]](s32), [[PRED_COPY9]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK-NEXT: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD13]](p5) + ; CHECK-NEXT: G_STORE [[UV11]](s32), [[PRED_COPY10]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK-NEXT: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD14]](p5) + ; CHECK-NEXT: G_STORE [[UV12]](s32), [[PRED_COPY11]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK-NEXT: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD15]](p5) + ; CHECK-NEXT: G_STORE [[UV13]](s32), [[PRED_COPY12]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK-NEXT: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD16]](p5) + ; CHECK-NEXT: G_STORE [[UV14]](s32), [[PRED_COPY13]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK-NEXT: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD17]](p5) + ; CHECK-NEXT: G_STORE [[UV15]](s32), [[PRED_COPY14]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD18]](p5) + ; CHECK-NEXT: G_STORE [[UV16]](s32), [[PRED_COPY15]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK-NEXT: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD19]](p5) + ; CHECK-NEXT: G_STORE [[UV17]](s32), [[PRED_COPY16]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK-NEXT: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD20]](p5) + ; CHECK-NEXT: G_STORE [[UV18]](s32), [[PRED_COPY17]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK-NEXT: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD21]](p5) + ; CHECK-NEXT: G_STORE [[UV19]](s32), [[PRED_COPY18]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK-NEXT: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD22]](p5) + ; CHECK-NEXT: G_STORE [[UV20]](s32), [[PRED_COPY19]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK-NEXT: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD23]](p5) + ; CHECK-NEXT: G_STORE [[UV21]](s32), [[PRED_COPY20]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK-NEXT: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD24]](p5) + ; CHECK-NEXT: G_STORE [[UV22]](s32), [[PRED_COPY21]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK-NEXT: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD25]](p5) + ; CHECK-NEXT: G_STORE [[UV23]](s32), [[PRED_COPY22]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK-NEXT: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD26]](p5) + ; CHECK-NEXT: G_STORE [[UV24]](s32), [[PRED_COPY23]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK-NEXT: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD27]](p5) + ; CHECK-NEXT: G_STORE [[UV25]](s32), [[PRED_COPY24]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK-NEXT: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD28]](p5) + ; CHECK-NEXT: G_STORE [[UV26]](s32), [[PRED_COPY25]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK-NEXT: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD29]](p5) + ; CHECK-NEXT: G_STORE [[UV27]](s32), [[PRED_COPY26]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK-NEXT: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD30]](p5) + ; CHECK-NEXT: G_STORE [[UV28]](s32), [[PRED_COPY27]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK-NEXT: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD31]](p5) + ; CHECK-NEXT: G_STORE [[UV29]](s32), [[PRED_COPY28]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK-NEXT: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD32]](p5) + ; CHECK-NEXT: G_STORE [[UV30]](s32), [[PRED_COPY29]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD33]](p5) + ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PRED_COPY30]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD34]](p5) + ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PRED_COPY31]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD35]](p5) + ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PRED_COPY32]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK-NEXT: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD36]](p5) + ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PRED_COPY33]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK-NEXT: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD37]](p5) + ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PRED_COPY34]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK-NEXT: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD38]](p5) + ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PRED_COPY35]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK-NEXT: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD39]](p5) + ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PRED_COPY36]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK-NEXT: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD40]](p5) + ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PRED_COPY37]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK-NEXT: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD41]](p5) + ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PRED_COPY38]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK-NEXT: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD42]](p5) + ; CHECK-NEXT: G_STORE [[UV40]](s32), [[PRED_COPY39]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK-NEXT: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD43]](p5) + ; CHECK-NEXT: G_STORE [[UV41]](s32), [[PRED_COPY40]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK-NEXT: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD44]](p5) + ; CHECK-NEXT: G_STORE [[UV42]](s32), [[PRED_COPY41]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK-NEXT: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD45]](p5) + ; CHECK-NEXT: G_STORE [[UV43]](s32), [[PRED_COPY42]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK-NEXT: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD46]](p5) + ; CHECK-NEXT: G_STORE [[UV44]](s32), [[PRED_COPY43]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK-NEXT: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD47]](p5) + ; CHECK-NEXT: G_STORE [[UV45]](s32), [[PRED_COPY44]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK-NEXT: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD48]](p5) + ; CHECK-NEXT: G_STORE [[UV46]](s32), [[PRED_COPY45]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK-NEXT: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD49]](p5) + ; CHECK-NEXT: G_STORE [[UV47]](s32), [[PRED_COPY46]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK-NEXT: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD50]](p5) + ; CHECK-NEXT: G_STORE [[UV48]](s32), [[PRED_COPY47]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK-NEXT: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD51]](p5) + ; CHECK-NEXT: G_STORE [[UV49]](s32), [[PRED_COPY48]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK-NEXT: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD52]](p5) + ; CHECK-NEXT: G_STORE [[UV50]](s32), [[PRED_COPY49]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK-NEXT: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD53]](p5) + ; CHECK-NEXT: G_STORE [[UV51]](s32), [[PRED_COPY50]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK-NEXT: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD54]](p5) + ; CHECK-NEXT: G_STORE [[UV52]](s32), [[PRED_COPY51]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK-NEXT: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD55]](p5) + ; CHECK-NEXT: G_STORE [[UV53]](s32), [[PRED_COPY52]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK-NEXT: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD56]](p5) + ; CHECK-NEXT: G_STORE [[UV54]](s32), [[PRED_COPY53]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK-NEXT: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD57]](p5) + ; CHECK-NEXT: G_STORE [[UV55]](s32), [[PRED_COPY54]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK-NEXT: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD58]](p5) + ; CHECK-NEXT: G_STORE [[UV56]](s32), [[PRED_COPY55]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK-NEXT: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY56:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD59]](p5) + ; CHECK-NEXT: G_STORE [[UV57]](s32), [[PRED_COPY56]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK-NEXT: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY57:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD60]](p5) + ; CHECK-NEXT: G_STORE [[UV58]](s32), [[PRED_COPY57]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK-NEXT: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY58:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD61]](p5) + ; CHECK-NEXT: G_STORE [[UV59]](s32), [[PRED_COPY58]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK-NEXT: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY59:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD62]](p5) + ; CHECK-NEXT: G_STORE [[UV60]](s32), [[PRED_COPY59]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK-NEXT: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY60:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD63]](p5) + ; CHECK-NEXT: G_STORE [[UV61]](s32), [[PRED_COPY60]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK-NEXT: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY61:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD64]](p5) + ; CHECK-NEXT: G_STORE [[UV62]](s32), [[PRED_COPY61]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK-NEXT: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK-NEXT: [[PRED_COPY62:%[0-9]+]]:_(p5) = PRED_COPY [[PTR_ADD65]](p5) + ; CHECK-NEXT: G_STORE [[UV63]](s32), [[PRED_COPY62]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) ; CHECK-NEXT: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]] ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]] @@ -758,49 +758,49 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) ; CHECK-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) ; CHECK-NEXT: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) + ; CHECK-NEXT: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C68]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) + ; CHECK-NEXT: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C69]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) + ; CHECK-NEXT: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C70]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) + ; CHECK-NEXT: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C71]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) + ; CHECK-NEXT: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C72]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) + ; CHECK-NEXT: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C73]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) + ; CHECK-NEXT: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C74]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) + ; CHECK-NEXT: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C75]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) + ; CHECK-NEXT: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C76]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) - ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) + ; CHECK-NEXT: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) + ; CHECK-NEXT: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C77]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) + ; CHECK-NEXT: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C78]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64) + ; CHECK-NEXT: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C79]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 90c2b0a69b1f8..5a2e79acf9619 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -1694,8 +1694,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1721,8 +1721,8 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -131071 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1748,8 +1748,8 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16776961 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1770,12 +1770,12 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll index 1b0cff0f58afd..c2576dbd30876 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -8,39 +8,39 @@ define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_swap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -53,39 +53,39 @@ define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -98,39 +98,39 @@ define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_sub_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -143,39 +143,39 @@ define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -189,39 +189,39 @@ define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umin_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -234,39 +234,39 @@ define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -279,39 +279,39 @@ define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umax_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -324,39 +324,39 @@ define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_and_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -369,39 +369,39 @@ define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_or_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -414,39 +414,39 @@ define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_xor_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -459,39 +459,39 @@ define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_inc_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -504,39 +504,39 @@ define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_dec_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -549,43 +549,43 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -598,45 +598,45 @@ define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -649,55 +649,55 @@ define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -710,55 +710,55 @@ define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) @@ -771,45 +771,45 @@ define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -822,55 +822,55 @@ define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) @@ -883,55 +883,55 @@ define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -944,57 +944,57 @@ define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[PRED_COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) @@ -1007,39 +1007,39 @@ define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[PRED_COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) @@ -1052,49 +1052,49 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -1107,59 +1107,59 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -1172,61 +1172,61 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index e86e224bc2b32..87e5eb5a6981b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -8,51 +8,51 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -65,61 +65,61 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -133,75 +133,75 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -216,75 +216,75 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -299,61 +299,61 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -367,75 +367,75 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -450,75 +450,75 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -533,79 +533,79 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -621,61 +621,61 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -689,75 +689,75 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -772,79 +772,79 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -860,79 +860,79 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -948,75 +948,75 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1031,79 +1031,79 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_mip_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1119,51 +1119,51 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1176,61 +1176,61 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1244,75 +1244,75 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1327,75 +1327,75 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1410,61 +1410,61 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1478,75 +1478,75 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1561,75 +1561,75 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1644,79 +1644,79 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1732,61 +1732,61 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1800,75 +1800,75 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1883,79 +1883,79 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1971,79 +1971,79 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2059,75 +2059,75 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2142,79 +2142,79 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr5 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY13]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2230,51 +2230,51 @@ define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_1d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2287,51 +2287,51 @@ define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2344,51 +2344,51 @@ define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_3d ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2401,51 +2401,51 @@ define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_cube ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2458,51 +2458,51 @@ define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_1darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2515,51 +2515,51 @@ define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2darray ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2572,51 +2572,51 @@ define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2dmsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2629,51 +2629,51 @@ define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_2darraymsaa ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -2686,43 +2686,43 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: load_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2735,47 +2735,47 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX10NSA-LABEL: name: load_1d_V2 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2788,43 +2788,43 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[PRED_COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V1 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[PRED_COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2837,47 +2837,47 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V2 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2890,51 +2890,51 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_glc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2947,51 +2947,51 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3004,51 +3004,51 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_glc_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3061,51 +3061,51 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3118,51 +3118,51 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3175,51 +3175,51 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc_slc ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr4 ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY12]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") + ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3234,10 +3234,10 @@ define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x floa ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: getresinfo_dmask0 ; GFX10NSA: bb.1.main_body: @@ -3245,10 +3245,10 @@ define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x floa ; GFX10NSA-NEXT: {{ $}} ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 @@ -3261,55 +3261,55 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_1d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3325,65 +3325,65 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3400,79 +3400,79 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_3d_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -3490,83 +3490,83 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY8]](<2 x s16>) ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY9]](<2 x s16>) ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index cbf4c9bcefc50..da6e8dc70a46b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -7,41 +7,41 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](s16) - ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[ANYEXT]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -52,19 +52,19 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -73,26 +73,26 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -103,19 +103,19 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -129,26 +129,26 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -158,8 +158,8 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -170,19 +170,19 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -196,29 +196,29 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -229,45 +229,45 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -281,20 +281,20 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -304,30 +304,30 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -341,20 +341,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -369,27 +369,27 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) @@ -402,8 +402,8 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -417,20 +417,20 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>) ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -445,33 +445,33 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -485,19 +485,19 @@ define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex @@ -508,19 +508,19 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -528,26 +528,26 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -558,19 +558,19 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex @@ -581,19 +581,19 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -606,33 +606,33 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -643,19 +643,19 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -665,33 +665,33 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -702,8 +702,8 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -714,15 +714,15 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -733,8 +733,8 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -745,19 +745,19 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -771,29 +771,29 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -804,19 +804,19 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -829,29 +829,29 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -862,19 +862,19 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]], [[C]] ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -884,29 +884,29 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -917,23 +917,23 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[UV1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex @@ -944,45 +944,45 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 @@ -996,20 +996,20 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -1019,30 +1019,30 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1056,20 +1056,20 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] @@ -1079,30 +1079,30 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 @@ -1116,20 +1116,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1143,27 +1143,27 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1172,8 +1172,8 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1187,20 +1187,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1212,27 +1212,27 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1241,8 +1241,8 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1256,20 +1256,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1281,27 +1281,27 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -1310,8 +1310,8 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1325,20 +1325,20 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1353,33 +1353,33 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>), align 8, addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1393,20 +1393,20 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1420,33 +1420,33 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1460,20 +1460,20 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1485,33 +1485,33 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 @@ -1525,20 +1525,20 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -1550,33 +1550,33 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16), addrspace 7) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[DEF1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index e20e567fae039..e13e58a49d47e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -6,20 +6,20 @@ define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -30,22 +30,22 @@ define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -56,23 +56,23 @@ define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -83,24 +83,24 @@ define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -111,23 +111,23 @@ define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -141,24 +141,24 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -172,25 +172,25 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -204,26 +204,26 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GCN-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -237,10 +237,10 @@ define amdgpu_ps float @image_load_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -251,22 +251,22 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -277,12 +277,12 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -293,24 +293,24 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -321,23 +321,23 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -348,13 +348,13 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -365,25 +365,25 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -394,25 +394,25 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -423,24 +423,24 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -451,14 +451,14 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex @@ -469,23 +469,23 @@ define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %res, 0 @@ -499,25 +499,25 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -531,25 +531,25 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x float>, i32 } %res, 0 @@ -563,26 +563,26 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -596,26 +596,26 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -629,26 +629,26 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x float>, i32 } %res, 0 @@ -662,27 +662,27 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>), align 16, addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -696,27 +696,27 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -730,27 +730,27 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 @@ -764,27 +764,27 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GCN-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[DEF1]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GCN-NEXT: $vgpr1 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr2 = PRED_COPY [[DEF1]](s32) + ; GCN-NEXT: $vgpr3 = PRED_COPY [[DEF1]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x float>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll index 8421dcf991c94..5fe249f345fe5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -8,50 +8,50 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %v @@ -62,58 +62,58 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 ad ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX6-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX6-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX6-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX6-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10NSA-LABEL: name: load_2darraymsaa_tfe ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10NSA-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[PRED_COPY10]](s32), [[PRED_COPY11]](s32), [[PRED_COPY12]](s32), [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10NSA-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10NSA-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue { <4 x float>, i32 } %v, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll index 8e41a41219776..c25b1e6abd1bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -8,40 +8,40 @@ define amdgpu_ps float @image_load_3d_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX6-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32), addrspace 7) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.3d.f32.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) ret float %tex @@ -52,46 +52,46 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GFX6-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32) + ; GFX6-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX6-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX6-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32 ; GFX10NSA: bb.1 (%ir-block.0): ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10NSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10NSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10NSA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10NSA-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10NSA-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10NSA-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10NSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10NSA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10NSA-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10NSA-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10NSA-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10NSA-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10NSA-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10NSA-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10NSA-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10NSA-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") + ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32), addrspace 7) ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10NSA-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { float, i32 } %val, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll index c24e9271c1ca0..2f37347f5b787 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -8,55 +8,55 @@ define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -68,61 +68,61 @@ define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -134,71 +134,71 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -210,71 +210,71 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cube ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -286,61 +286,61 @@ define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_1darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -352,71 +352,71 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_2darray ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -428,65 +428,65 @@ define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -498,67 +498,67 @@ define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -570,61 +570,61 @@ define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -636,71 +636,71 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -712,67 +712,67 @@ define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -784,74 +784,74 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -863,67 +863,67 @@ define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -935,71 +935,71 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1011,70 +1011,70 @@ define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1086,74 +1086,74 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1165,71 +1165,71 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1241,76 +1241,76 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1322,74 +1322,74 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1401,80 +1401,80 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_b_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1486,72 +1486,72 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1563,82 +1563,82 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1650,38 +1650,38 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1690,49 +1690,49 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_3d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1741,12 +1741,12 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX10-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1758,76 +1758,76 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1839,86 +1839,86 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1930,76 +1930,76 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2011,90 +2011,90 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2106,80 +2106,80 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2191,94 +2191,94 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_d_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2290,72 +2290,72 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2367,82 +2367,82 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2454,76 +2454,76 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2535,86 +2535,86 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2626,76 +2626,76 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2707,90 +2707,90 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2802,80 +2802,80 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_cl_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2887,94 +2887,94 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_cd_cl_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -2986,61 +2986,61 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3052,71 +3052,71 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3128,67 +3128,67 @@ define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_l_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3200,74 +3200,74 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_l_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3279,55 +3279,55 @@ define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3339,61 +3339,61 @@ define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3405,65 +3405,65 @@ define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_lz_1d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3475,67 +3475,67 @@ define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: sample_c_lz_2d ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3547,91 +3547,91 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: sample_c_d_o_2darray_V1 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -3643,95 +3643,95 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX9-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX9-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX9-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX9-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX9-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX9-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX9-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: sample_c_d_o_2darray_V2 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll index 184069142b9b6..d3cbae0beed9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -7,69 +7,69 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -81,79 +81,79 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -165,99 +165,99 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX10-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_3d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -269,73 +269,73 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -347,83 +347,83 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -435,73 +435,73 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -513,83 +513,83 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -601,77 +601,77 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -683,89 +683,89 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_d_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -777,69 +777,69 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -851,79 +851,79 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -935,73 +935,73 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1013,83 +1013,83 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1101,73 +1101,73 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1179,83 +1179,83 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1267,77 +1267,77 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_cl_1d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1349,89 +1349,89 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX11-LABEL: name: sample_c_cd_cl_2d ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1443,85 +1443,85 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource") - ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) @@ -1533,89 +1533,89 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX10: bb.1.main_body: ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX10-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX10-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX10-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX10-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX10-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX10-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX10-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 ; GFX11: bb.1.main_body: ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) - ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) - ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX11-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $sgpr10 + ; GFX11-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $sgpr11 + ; GFX11-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $sgpr12 + ; GFX11-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GFX11-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY14]](s32) + ; GFX11-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY15]](s32) + ; GFX11-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY16]](s32) + ; GFX11-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY17]](s32) + ; GFX11-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY $vgpr6 + ; GFX11-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY $vgpr7 + ; GFX11-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY $vgpr8 + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY13]](s32) ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) - ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY18]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY19]](s32) + ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[PRED_COPY20]](s32) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 302a4eec3b2da..874c93978670e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -10,81 +10,81 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY10]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -95,84 +95,84 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[PRED_COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[PRED_COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -183,49 +183,49 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] @@ -241,69 +241,69 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 7) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>), align 8, addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -314,94 +314,94 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[PRED_COPY11]](<2 x s16>) ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) - ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX81-NEXT: {{ $}} - ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX81-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX81-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX81-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX81-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX81-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX81-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX81-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX81-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX81-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX81-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX81-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX9-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX9-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX9-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX9-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX9-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX9-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX9-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX9-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY $sgpr2 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY $sgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY $sgpr4 + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY $sgpr5 + ; GFX10-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY $sgpr6 + ; GFX10-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY $sgpr7 + ; GFX10-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY $sgpr8 + ; GFX10-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GFX10-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY $vgpr0 + ; GFX10-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY $vgpr1 + ; GFX10-NEXT: [[PRED_COPY10:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[PRED_COPY11:%[0-9]+]]:_(<2 x s16>) = PRED_COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY10]](<2 x s16>), [[PRED_COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE_D16 intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 2ce09043b15b6..c3b1c196e0091 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -2470,17 +2470,17 @@ body: | ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2582,17 +2582,17 @@ body: | ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4783,8 +4783,8 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4826,8 +4826,8 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4869,8 +4869,8 @@ body: | ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4957,8 +4957,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5034,8 +5034,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5111,8 +5111,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -5294,8 +5294,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5328,8 +5328,8 @@ body: | ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5407,8 +5407,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5441,8 +5441,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5520,8 +5520,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5554,8 +5554,8 @@ body: | ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5704,8 +5704,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5738,8 +5738,8 @@ body: | ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -5772,8 +5772,8 @@ body: | ; CI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; CI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; CI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; CI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -5849,8 +5849,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -5883,8 +5883,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -5917,8 +5917,8 @@ body: | ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -5994,8 +5994,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -6028,8 +6028,8 @@ body: | ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -6062,8 +6062,8 @@ body: | ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -7254,10 +7254,10 @@ body: | ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7356,10 +7356,10 @@ body: | ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7458,10 +7458,10 @@ body: | ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7524,10 +7524,10 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7576,10 +7576,10 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7628,10 +7628,10 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7656,10 +7656,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7670,10 +7670,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7684,10 +7684,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -7712,10 +7712,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7726,10 +7726,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7740,10 +7740,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index ca121f6ed12c8..bf38f29940d08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -2308,17 +2308,17 @@ body: | ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2420,17 +2420,17 @@ body: | ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4200,8 +4200,8 @@ body: | ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4243,8 +4243,8 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4286,8 +4286,8 @@ body: | ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4374,8 +4374,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4451,8 +4451,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4528,8 +4528,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -4720,8 +4720,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -4754,8 +4754,8 @@ body: | ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -4833,8 +4833,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -4867,8 +4867,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -4946,8 +4946,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -4980,8 +4980,8 @@ body: | ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -5154,8 +5154,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 @@ -5189,8 +5189,8 @@ body: | ; CI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; CI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) @@ -5222,8 +5222,8 @@ body: | ; CI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; CI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; CI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; CI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) @@ -5300,8 +5300,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 @@ -5335,8 +5335,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; VI-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) @@ -5368,8 +5368,8 @@ body: | ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) @@ -5446,8 +5446,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 @@ -5481,8 +5481,8 @@ body: | ; GFX9-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; GFX9-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) @@ -5514,8 +5514,8 @@ body: | ; GFX9-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index fcbcfddef1580..103a818e063f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -3764,17 +3764,17 @@ body: | ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3795,17 +3795,17 @@ body: | ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3826,17 +3826,17 @@ body: | ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -3968,17 +3968,17 @@ body: | ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4004,17 +4004,17 @@ body: | ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-HSA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -4045,17 +4045,17 @@ body: | ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -10454,8 +10454,8 @@ body: | ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10503,8 +10503,8 @@ body: | ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10546,8 +10546,8 @@ body: | ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10595,8 +10595,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) ; GFX9-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; GFX9-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10683,8 +10683,8 @@ body: | ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10766,8 +10766,8 @@ body: | ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10843,8 +10843,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -10926,8 +10926,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -11231,8 +11231,8 @@ body: | ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11265,8 +11265,8 @@ body: | ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11357,8 +11357,8 @@ body: | ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11391,8 +11391,8 @@ body: | ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11470,8 +11470,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11504,8 +11504,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11596,8 +11596,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11630,8 +11630,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) @@ -11816,8 +11816,8 @@ body: | ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -11850,8 +11850,8 @@ body: | ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; SI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -11884,8 +11884,8 @@ body: | ; SI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; SI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; SI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -11967,8 +11967,8 @@ body: | ; CI-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12001,8 +12001,8 @@ body: | ; CI-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; CI-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; CI-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12035,8 +12035,8 @@ body: | ; CI-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; CI-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; CI-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; CI-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; CI-MESA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -12112,8 +12112,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12146,8 +12146,8 @@ body: | ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; VI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12180,8 +12180,8 @@ body: | ; VI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; VI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -12263,8 +12263,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-MESA-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-MESA-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) @@ -12297,8 +12297,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] ; GFX9-MESA-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[PRED_COPY1]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX9-MESA-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9-MESA-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) @@ -12331,8 +12331,8 @@ body: | ; GFX9-MESA-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) ; GFX9-MESA-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] ; GFX9-MESA-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) - ; GFX9-MESA-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX9-MESA-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-MESA-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[PRED_COPY2]](s32) ; GFX9-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) @@ -14190,10 +14190,10 @@ body: | ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14204,10 +14204,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14306,10 +14306,10 @@ body: | ; CI-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14408,10 +14408,10 @@ body: | ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14422,10 +14422,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14524,10 +14524,10 @@ body: | ; GFX9-MESA-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14590,10 +14590,10 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14604,10 +14604,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14656,10 +14656,10 @@ body: | ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14708,10 +14708,10 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14722,10 +14722,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14774,10 +14774,10 @@ body: | ; GFX9-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14811,10 +14811,10 @@ body: | ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14825,10 +14825,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14839,10 +14839,10 @@ body: | ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14853,10 +14853,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14867,10 +14867,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14881,10 +14881,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 @@ -14916,10 +14916,10 @@ body: | ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14930,10 +14930,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14944,10 +14944,10 @@ body: | ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_global_v2s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14958,10 +14958,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14972,10 +14972,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-HSA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14986,10 +14986,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-MESA-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 46a6125225dfb..873a8e6d5cb51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6783,17 +6783,17 @@ body: | ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -6814,17 +6814,17 @@ body: | ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -6845,17 +6845,17 @@ body: | ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -7102,17 +7102,17 @@ body: | ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -7143,17 +7143,17 @@ body: | ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -7184,17 +7184,17 @@ body: | ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-DS128-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-DS128-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -13007,8 +13007,8 @@ body: | ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13083,8 +13083,8 @@ body: | ; CI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13159,8 +13159,8 @@ body: | ; CI-DS128-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; CI-DS128-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; CI-DS128-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CI-DS128-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13235,8 +13235,8 @@ body: | ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13311,8 +13311,8 @@ body: | ; GFX9-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX9-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13393,8 +13393,8 @@ body: | ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13418,8 +13418,8 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD3]](s32) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX10-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -13494,8 +13494,8 @@ body: | ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[PRED_COPY]](s32) ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) @@ -15117,10 +15117,10 @@ body: | ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15218,10 +15218,10 @@ body: | ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15319,10 +15319,10 @@ body: | ; CI-DS128-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15420,10 +15420,10 @@ body: | ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -15521,10 +15521,10 @@ body: | ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -15535,10 +15535,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -15636,10 +15636,10 @@ body: | ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -15662,10 +15662,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -15763,10 +15763,10 @@ body: | ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15777,10 +15777,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -15843,10 +15843,10 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15895,10 +15895,10 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15947,10 +15947,10 @@ body: | ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15999,10 +15999,10 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16051,10 +16051,10 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16065,10 +16065,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16117,10 +16117,10 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16143,10 +16143,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16195,10 +16195,10 @@ body: | ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16209,10 +16209,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -16246,10 +16246,10 @@ body: | ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16269,10 +16269,10 @@ body: | ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16295,10 +16295,10 @@ body: | ; CI-DS128-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16321,10 +16321,10 @@ body: | ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16347,10 +16347,10 @@ body: | ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16361,10 +16361,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16387,10 +16387,10 @@ body: | ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16413,10 +16413,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16439,10 +16439,10 @@ body: | ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16453,10 +16453,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -16490,10 +16490,10 @@ body: | ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16513,10 +16513,10 @@ body: | ; CI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16534,10 +16534,10 @@ body: | ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-DS128-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16555,10 +16555,10 @@ body: | ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16576,10 +16576,10 @@ body: | ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16590,10 +16590,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX9-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16611,10 +16611,10 @@ body: | ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16632,10 +16632,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_local_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16653,10 +16653,10 @@ body: | ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16667,10 +16667,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 620f012805294..49602d718d1a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4676,17 +4676,17 @@ body: | ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -4707,17 +4707,17 @@ body: | ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -4877,17 +4877,17 @@ body: | ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -4918,17 +4918,17 @@ body: | ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) @@ -11236,10 +11236,10 @@ body: | ; SI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11337,10 +11337,10 @@ body: | ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11438,10 +11438,10 @@ body: | ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11539,10 +11539,10 @@ body: | ; GFX9-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11640,10 +11640,10 @@ body: | ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11654,10 +11654,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -11720,10 +11720,10 @@ body: | ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11772,10 +11772,10 @@ body: | ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11824,10 +11824,10 @@ body: | ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11876,10 +11876,10 @@ body: | ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11928,10 +11928,10 @@ body: | ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11942,10 +11942,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -11982,10 +11982,10 @@ body: | ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12008,10 +12008,10 @@ body: | ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12034,10 +12034,10 @@ body: | ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12060,10 +12060,10 @@ body: | ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12086,10 +12086,10 @@ body: | ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12100,10 +12100,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -12140,10 +12140,10 @@ body: | ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12166,10 +12166,10 @@ body: | ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; CI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12192,10 +12192,10 @@ body: | ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12218,10 +12218,10 @@ body: | ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX10-LABEL: name: test_load_private_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12244,10 +12244,10 @@ body: | ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) ; GFX11-LABEL: name: test_load_private_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12258,10 +12258,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) - ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s96) = PRED_COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PRED_COPY]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[PRED_COPY1]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir index 021cebbb6cb49..52ef976213d07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s8) = PRED_COPY [[TRUNC]](s8) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (store (s8)) ; CHECK-NEXT: S_ENDPGM 0 %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir index c1b3b758c22cf..5c04698435437 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -27,24 +27,24 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY8]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY9]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY10]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -95,8 +95,8 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C3]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -119,10 +119,10 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 @@ -154,18 +154,18 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C6]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -246,29 +246,29 @@ body: | ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C7]](s16) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY6]], [[C5]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[PRED_COPY7]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] @@ -312,33 +312,33 @@ body: | ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s16) = PRED_COPY [[C8]](s16) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY6]], [[C6]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[PRED_COPY7]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY9]], [[C10]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] @@ -376,33 +376,33 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C8]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C9]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C10]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[C11]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C12]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C7]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32) %0:_(s4) = G_CONSTANT i4 0 @@ -430,29 +430,29 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]] ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -494,44 +494,44 @@ body: | ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[PRED_COPY6]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s16) = PRED_COPY [[C9]](s16) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[PRED_COPY8]], [[C7]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[PRED_COPY9]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]] ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[PRED_COPY11]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -609,24 +609,24 @@ body: | ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[PRED_COPY4]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 @@ -667,315 +667,315 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY5]], [[C5]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY6]], [[C6]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY8]], [[C8]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY9]], [[C9]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY10]], [[C10]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY11]], [[C11]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY12]], [[C12]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY13]], [[C13]](s32) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY14]], [[C14]](s32) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32) + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY15]], [[C15]](s32) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY16]], [[C16]](s32) ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY17]], [[C17]](s32) ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY18]], [[C18]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY19]], [[C19]](s32) ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32) + ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY20]], [[C20]](s32) ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32) + ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY21]], [[C21]](s32) ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32) + ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY22]], [[C22]](s32) ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32) + ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY23]], [[C23]](s32) ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32) + ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY24]], [[C24]](s32) ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32) + ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY25]], [[C25]](s32) ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY26:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32) + ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY26]], [[C26]](s32) ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY27:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32) + ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY27]], [[C27]](s32) ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY28:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32) + ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY28]], [[C28]](s32) ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY29:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32) + ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY29]], [[C29]](s32) ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY30:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32) + ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY30]], [[C30]](s32) ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY31:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32) + ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY31]], [[C31]](s32) ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]] - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY32:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY33:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY33]], [[C1]](s32) + ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY32]], [[SHL31]] + ; CHECK-NEXT: [[PRED_COPY34:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY34]], [[C2]](s32) ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY35:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY35]], [[C3]](s32) ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY36:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY36]], [[C4]](s32) ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY37:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY37]], [[C5]](s32) ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY38:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY38]], [[C6]](s32) ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY39:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY39]], [[C7]](s32) ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32) + ; CHECK-NEXT: [[PRED_COPY40:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY40]], [[C8]](s32) ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY41:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY41]], [[C9]](s32) ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY42:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY42]], [[C10]](s32) ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY43:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY43]], [[C11]](s32) ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32) + ; CHECK-NEXT: [[PRED_COPY44:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY44]], [[C12]](s32) ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32) + ; CHECK-NEXT: [[PRED_COPY45:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY45]], [[C13]](s32) ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32) + ; CHECK-NEXT: [[PRED_COPY46:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY46]], [[C14]](s32) ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32) + ; CHECK-NEXT: [[PRED_COPY47:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY47]], [[C15]](s32) ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32) + ; CHECK-NEXT: [[PRED_COPY48:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY48]], [[C16]](s32) ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32) + ; CHECK-NEXT: [[PRED_COPY49:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY49]], [[C17]](s32) ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32) + ; CHECK-NEXT: [[PRED_COPY50:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY50]], [[C18]](s32) ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32) + ; CHECK-NEXT: [[PRED_COPY51:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY51]], [[C19]](s32) ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY52:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY52]], [[C20]](s32) ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]] - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32) + ; CHECK-NEXT: [[PRED_COPY53:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY53]], [[C21]](s32) ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]] - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32) + ; CHECK-NEXT: [[PRED_COPY54:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY54]], [[C22]](s32) ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]] - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32) + ; CHECK-NEXT: [[PRED_COPY55:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY55]], [[C23]](s32) ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]] - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32) + ; CHECK-NEXT: [[PRED_COPY56:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY56]], [[C24]](s32) ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]] - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32) + ; CHECK-NEXT: [[PRED_COPY57:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY57]], [[C25]](s32) ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32) + ; CHECK-NEXT: [[PRED_COPY58:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY58]], [[C26]](s32) ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]] - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32) + ; CHECK-NEXT: [[PRED_COPY59:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY59]], [[C27]](s32) ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]] - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32) + ; CHECK-NEXT: [[PRED_COPY60:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY60]], [[C28]](s32) ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]] - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32) + ; CHECK-NEXT: [[PRED_COPY61:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY61]], [[C29]](s32) ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]] - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32) + ; CHECK-NEXT: [[PRED_COPY62:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY62]], [[C30]](s32) ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]] - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32) + ; CHECK-NEXT: [[PRED_COPY63:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY63]], [[C31]](s32) ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]] - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32) - ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]] - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY64:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY65:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY65]], [[C1]](s32) + ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY64]], [[SHL62]] + ; CHECK-NEXT: [[PRED_COPY66:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY66]], [[C2]](s32) ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]] - ; CHECK-NEXT: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY67:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY67]], [[C3]](s32) ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]] - ; CHECK-NEXT: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY68:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY68]], [[C4]](s32) ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]] - ; CHECK-NEXT: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32) + ; CHECK-NEXT: [[PRED_COPY69:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY69]], [[C5]](s32) ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]] - ; CHECK-NEXT: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32) + ; CHECK-NEXT: [[PRED_COPY70:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY70]], [[C6]](s32) ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]] - ; CHECK-NEXT: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32) + ; CHECK-NEXT: [[PRED_COPY71:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY71]], [[C7]](s32) ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]] - ; CHECK-NEXT: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32) + ; CHECK-NEXT: [[PRED_COPY72:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY72]], [[C8]](s32) ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]] - ; CHECK-NEXT: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32) + ; CHECK-NEXT: [[PRED_COPY73:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY73]], [[C9]](s32) ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]] - ; CHECK-NEXT: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32) + ; CHECK-NEXT: [[PRED_COPY74:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY74]], [[C10]](s32) ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]] - ; CHECK-NEXT: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32) + ; CHECK-NEXT: [[PRED_COPY75:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY75]], [[C11]](s32) ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]] - ; CHECK-NEXT: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32) + ; CHECK-NEXT: [[PRED_COPY76:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY76]], [[C12]](s32) ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]] - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32) + ; CHECK-NEXT: [[PRED_COPY77:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY77]], [[C13]](s32) ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]] - ; CHECK-NEXT: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32) + ; CHECK-NEXT: [[PRED_COPY78:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY78]], [[C14]](s32) ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]] - ; CHECK-NEXT: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32) + ; CHECK-NEXT: [[PRED_COPY79:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY79]], [[C15]](s32) ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]] - ; CHECK-NEXT: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32) + ; CHECK-NEXT: [[PRED_COPY80:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY80]], [[C16]](s32) ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]] - ; CHECK-NEXT: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32) + ; CHECK-NEXT: [[PRED_COPY81:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY81]], [[C17]](s32) ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]] - ; CHECK-NEXT: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32) + ; CHECK-NEXT: [[PRED_COPY82:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY82]], [[C18]](s32) ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]] - ; CHECK-NEXT: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32) + ; CHECK-NEXT: [[PRED_COPY83:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY83]], [[C19]](s32) ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]] - ; CHECK-NEXT: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32) + ; CHECK-NEXT: [[PRED_COPY84:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY84]], [[C20]](s32) ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]] - ; CHECK-NEXT: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32) + ; CHECK-NEXT: [[PRED_COPY85:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY85]], [[C21]](s32) ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]] - ; CHECK-NEXT: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32) + ; CHECK-NEXT: [[PRED_COPY86:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY86]], [[C22]](s32) ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]] - ; CHECK-NEXT: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32) + ; CHECK-NEXT: [[PRED_COPY87:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY87]], [[C23]](s32) ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]] - ; CHECK-NEXT: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32) + ; CHECK-NEXT: [[PRED_COPY88:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY88]], [[C24]](s32) ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]] - ; CHECK-NEXT: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32) + ; CHECK-NEXT: [[PRED_COPY89:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY89]], [[C25]](s32) ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]] - ; CHECK-NEXT: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32) + ; CHECK-NEXT: [[PRED_COPY90:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY90]], [[C26]](s32) ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]] - ; CHECK-NEXT: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32) + ; CHECK-NEXT: [[PRED_COPY91:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY91]], [[C27]](s32) ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]] - ; CHECK-NEXT: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32) + ; CHECK-NEXT: [[PRED_COPY92:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY92]], [[C28]](s32) ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]] - ; CHECK-NEXT: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32) + ; CHECK-NEXT: [[PRED_COPY93:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY93]], [[C29]](s32) ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]] - ; CHECK-NEXT: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32) + ; CHECK-NEXT: [[PRED_COPY94:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY94]], [[C30]](s32) ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]] - ; CHECK-NEXT: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32) + ; CHECK-NEXT: [[PRED_COPY95:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY95]], [[C31]](s32) ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index dbc38d9e5f128..630b9d2fe4b14 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -459,8 +459,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -541,8 +541,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -561,10 +561,10 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -572,10 +572,10 @@ body: | ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 7b439e501854c..fa67048e2e05f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -134,8 +134,8 @@ body: | ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir index 44192599e4595..a6aa422d04e05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -21,33 +21,33 @@ body: | ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -61,26 +61,26 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[PRED_COPY]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -89,10 +89,10 @@ body: | ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC2]] - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[TRUNC3]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16) ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] @@ -215,33 +215,33 @@ body: | ; GFX-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; GFX-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PRED_COPY]](s32) ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[PRED_COPY]] ; GFX-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[PRED_COPY]] ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[PRED_COPY]] + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[PRED_COPY]] ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[PRED_COPY]] + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[PRED_COPY]] ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C5]](s32) ; GFX-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; GFX-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 09aaf8d548758..4cd01f75b3044 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -18,8 +18,8 @@ body: | ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] @@ -164,8 +164,8 @@ body: | ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] @@ -227,11 +227,11 @@ body: | ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] @@ -343,14 +343,14 @@ body: | ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[PRED_COPY2]] ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index a175a87f239e3..18e627aa9e1da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -190,10 +190,10 @@ body: | ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index 293fcb620fe70..97a566ec36cfd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -403,16 +403,16 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY1]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index d5f992a42b07a..3ed6004a9a7bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -1236,8 +1236,8 @@ body: | ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) @@ -1253,8 +1253,8 @@ body: | ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) @@ -1270,8 +1270,8 @@ body: | ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[UV1]](s128) ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[TRUNC1]], 1 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[COPY1]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 8bbde77b42697..cf7da440d252e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -36,10 +36,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -62,10 +62,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -88,10 +88,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -138,8 +138,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -162,9 +162,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -187,12 +187,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -215,10 +215,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -313,18 +313,18 @@ body: | ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index 68d8bf78e9d77..39aed517b5808 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -56,9 +56,9 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -97,12 +97,12 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 @@ -378,9 +378,9 @@ body: | ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -419,12 +419,12 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 @@ -978,18 +978,18 @@ body: | ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -1007,14 +1007,14 @@ body: | ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) + ; GFX9-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[BITCAST]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) @@ -1044,13 +1044,13 @@ body: | ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] @@ -1066,14 +1066,14 @@ body: | ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) @@ -1099,13 +1099,13 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -1120,14 +1120,14 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; GFX9-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 43496a8aec8c4..082f4c909b918 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -207,9 +207,9 @@ body: | ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index 896d057fd74f6..6a7acfe38772b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -18,8 +18,8 @@ body: | ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] @@ -164,8 +164,8 @@ body: | ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] @@ -227,11 +227,11 @@ body: | ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] @@ -343,14 +343,14 @@ body: | ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[PRED_COPY2]] ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 7d6b8c5f62190..0f8d6b87cf032 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -190,10 +190,10 @@ body: | ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 053455f901ed6..7a890bcaaceb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -267,48 +267,48 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -327,48 +327,48 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -387,18 +387,18 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align1 @@ -406,29 +406,29 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; CI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) @@ -437,12 +437,12 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 @@ -529,22 +529,22 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -559,17 +559,17 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -600,12 +600,12 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -618,12 +618,12 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[COPY1]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -683,22 +683,22 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -714,17 +714,17 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -756,12 +756,12 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 @@ -775,12 +775,12 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -844,29 +844,29 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) @@ -893,9 +893,9 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) @@ -903,7 +903,7 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -954,12 +954,12 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -986,12 +986,12 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -1050,44 +1050,44 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1102,23 +1102,23 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1128,13 +1128,13 @@ body: | ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1165,24 +1165,24 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1195,24 +1195,24 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1342,44 +1342,44 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1395,23 +1395,23 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1421,13 +1421,13 @@ body: | ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1459,24 +1459,24 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1490,24 +1490,24 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p0) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1637,44 +1637,44 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1690,23 +1690,23 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -1716,13 +1716,13 @@ body: | ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -1754,24 +1754,24 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1785,24 +1785,24 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[PTRTOINT]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[PTRTOINT]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1932,39 +1932,39 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -1980,17 +1980,17 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2001,13 +2001,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2039,19 +2039,19 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2065,19 +2065,19 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2208,40 +2208,40 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2258,17 +2258,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2280,13 +2280,13 @@ body: | ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2319,20 +2319,20 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2347,20 +2347,20 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[PTRTOINT1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2505,25 +2505,25 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2769,56 +2769,56 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -2834,17 +2834,17 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -2855,13 +2855,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -2872,13 +2872,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -2910,26 +2910,26 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -2943,26 +2943,26 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -3107,73 +3107,73 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3189,17 +3189,17 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -3210,13 +3210,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -3227,13 +3227,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -3244,13 +3244,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -3282,33 +3282,33 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3322,33 +3322,33 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3478,82 +3478,82 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY2]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY3]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY5]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY8]], [[COPY9]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY6]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY8]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY6]](s64) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3569,23 +3569,23 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -3595,13 +3595,13 @@ body: | ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) @@ -3612,18 +3612,18 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC6]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC6]](s32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s64) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s64) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) @@ -3633,13 +3633,13 @@ body: | ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC9]](s32) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC9]](s32) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s64) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR12]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) @@ -3671,42 +3671,42 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3720,42 +3720,42 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s64) = PRED_COPY [[UV]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY]](s64) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC1]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY [[UV1]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[COPY5]], [[COPY6]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s64) = PRED_COPY [[UV1]](s64) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[PRED_COPY3]], [[PRED_COPY4]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[TRUNC2]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C2]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[PRED_COPY3]](s64) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC2]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[TRUNC3]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC3]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C2]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3886,73 +3886,73 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -3970,17 +3970,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -3991,13 +3991,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4008,13 +4008,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4025,13 +4025,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -4065,33 +4065,33 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4107,33 +4107,33 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4277,73 +4277,73 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -4361,17 +4361,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -4382,13 +4382,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4399,13 +4399,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4416,13 +4416,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -4456,33 +4456,33 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4498,33 +4498,33 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4668,56 +4668,56 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -4735,17 +4735,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -4756,13 +4756,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -4773,13 +4773,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -4813,26 +4813,26 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -4848,26 +4848,26 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -5026,73 +5026,73 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5110,17 +5110,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -5131,13 +5131,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -5148,13 +5148,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -5165,13 +5165,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -5205,33 +5205,33 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -5247,33 +5247,33 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -5416,90 +5416,90 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -5520,17 +5520,17 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -5541,13 +5541,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -5558,13 +5558,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -5575,13 +5575,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -5592,13 +5592,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -5635,40 +5635,40 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5687,40 +5687,40 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5915,90 +5915,90 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -6021,17 +6021,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -6042,13 +6042,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -6059,13 +6059,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -6076,13 +6076,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -6093,13 +6093,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -6138,40 +6138,40 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6192,40 +6192,40 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6655,90 +6655,90 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -6761,17 +6761,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -6782,13 +6782,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -6799,13 +6799,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -6816,13 +6816,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -6833,13 +6833,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -6878,40 +6878,40 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6932,40 +6932,40 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -7174,139 +7174,139 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -7327,17 +7327,17 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -7348,13 +7348,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -7365,13 +7365,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -7382,13 +7382,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -7400,13 +7400,13 @@ body: | ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -7416,13 +7416,13 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -7432,13 +7432,13 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -7448,13 +7448,13 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -7491,59 +7491,59 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7562,59 +7562,59 @@ body: | ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7853,139 +7853,139 @@ body: | ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -8008,17 +8008,17 @@ body: | ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -8029,13 +8029,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -8046,13 +8046,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -8063,13 +8063,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -8081,13 +8081,13 @@ body: | ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -8097,13 +8097,13 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -8113,13 +8113,13 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -8129,13 +8129,13 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -8174,59 +8174,59 @@ body: | ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -8247,59 +8247,59 @@ body: | ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -8604,155 +8604,155 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY3]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY4]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY10]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY5]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY6]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY7]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY13]](s32) + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY8]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY9]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY14]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY10]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY11]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY16]](s32) + ; SI-NEXT: [[PRED_COPY12:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY11]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY12]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY15]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY11]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY13:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[PRED_COPY13]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY18]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY14:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY14]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY19]](s32) + ; SI-NEXT: [[PRED_COPY15:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY14]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[PRED_COPY15]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY18]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY14]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY16:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY20]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[PRED_COPY16]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY17:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY17]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY22]](s32) + ; SI-NEXT: [[PRED_COPY18:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY17]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[PRED_COPY18]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY21]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY17]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) - ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY19:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY23]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[PRED_COPY19]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY24]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY20:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY20]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; SI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY25]](s32) + ; SI-NEXT: [[PRED_COPY21:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY20]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[PRED_COPY21]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY24]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY20]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) - ; SI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY22:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY26]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[PRED_COPY22]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY23:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY23]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY28]](s32) + ; SI-NEXT: [[PRED_COPY24:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY23]], [[C3]] + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[PRED_COPY24]](s32) ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY27]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY23]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) - ; SI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY25:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR24]], [[C3]] - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY29]](s32) + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[PRED_COPY25]](s32) ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) @@ -8785,17 +8785,17 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -8806,13 +8806,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -8823,13 +8823,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY2]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; VI-NEXT: G_STORE [[ANYEXT4]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) @@ -8840,13 +8840,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY3]](s32) ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) ; VI-NEXT: G_STORE [[ANYEXT6]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) @@ -8857,13 +8857,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY4]](s32) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT8]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) @@ -8873,13 +8873,13 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY5]](s32) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) ; VI-NEXT: G_STORE [[ANYEXT10]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) @@ -8889,13 +8889,13 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY6]](s32) ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) ; VI-NEXT: G_STORE [[ANYEXT12]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) @@ -8905,13 +8905,13 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY7]](s32) ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) ; VI-NEXT: G_STORE [[ANYEXT14]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) @@ -8922,13 +8922,13 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) - ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[PRED_COPY8]](s32) ; VI-NEXT: [[LSHR25:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC16]], [[C2]](s16) ; VI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; VI-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR25]](s16) ; VI-NEXT: G_STORE [[ANYEXT16]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; VI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR24]](s32) @@ -8980,65 +8980,65 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 @@ -9069,65 +9069,65 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[UV1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[UV2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY2]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[UV3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY3]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[UV4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY4]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[UV5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY5]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[UV6]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY6]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[UV7]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY11]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY7]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) + ; VI-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[UV8]](s32) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY8]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 5f36761385683..eec9b6cfcace0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -325,12 +325,12 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -338,12 +338,12 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 @@ -362,22 +362,22 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C3]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -387,9 +387,9 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[TRUNC]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) @@ -397,7 +397,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -635,10 +635,10 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) @@ -784,17 +784,17 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -802,16 +802,16 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY2]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY2]], [[C5]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align1 @@ -840,14 +840,14 @@ body: | ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C1]](s16) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) @@ -875,10 +875,10 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -892,11 +892,11 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -924,11 +924,11 @@ body: | ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -954,10 +954,10 @@ body: | ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) @@ -971,11 +971,11 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY1]], [[C3]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 @@ -1003,11 +1003,11 @@ body: | ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -1042,20 +1042,20 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[PRED_COPY]], [[C5]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY1]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1079,8 +1079,8 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 @@ -1094,7 +1094,7 @@ body: | ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR3]], [[C6]](s16) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -1136,11 +1136,11 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 @@ -1162,11 +1162,11 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[OR2]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PRED_COPY]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index 869cccad2b5f5..ceeb3ac72789c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -150,10 +150,10 @@ body: | ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index 08ee4e1bba387..2f1e0d687364c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -218,13 +218,13 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -290,16 +290,16 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -344,16 +344,16 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -398,36 +398,36 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[PRED_COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[PRED_COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:_(s32) = PRED_COPY [[C1]](s32) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[PRED_COPY6]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:_(s32) = PRED_COPY [[C3]](s32) ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[PRED_COPY7]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[PRED_COPY8]](s32) ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index a8a3ab3e79efb..582f4de5eca99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -177,9 +177,9 @@ body: | ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 1cfde3549f28f..6e34658d6d180 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -143,10 +143,10 @@ body: | ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[PRED_COPY]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index eb60ac64ee294..928c1d023716d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -459,8 +459,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -540,8 +540,8 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -560,10 +560,10 @@ body: | ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -571,10 +571,10 @@ body: | ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:_(s32) = PRED_COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 98b210cbcdaad..c1d5d3b935bb3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -672,13 +672,13 @@ body: | ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s16) = PRED_COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[COPY1]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[PRED_COPY]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) @@ -727,8 +727,8 @@ body: | ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:_(s32) = PRED_COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[PRED_COPY]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) @@ -740,11 +740,11 @@ body: | ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:_(s64) = PRED_COPY [[MV1]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:_(s64) = PRED_COPY [[C]](s64) ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV]](s64), 0 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[PRED_COPY1]], [[PRED_COPY2]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] @@ -762,7 +762,7 @@ body: | ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL5]] ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) @@ -770,10 +770,10 @@ body: | ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL5]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:_(s32) = PRED_COPY [[OR7]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[PRED_COPY3]](s32), [[UV4]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL1]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL1]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[PRED_COPY]], [[SHL1]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND5]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll index 0121d27138a72..a12f447081b7a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.dispatch.id() #1 @@ -17,3 +17,6 @@ define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 { attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll index 38505220392b0..83468c6cc6604 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Error on non-HSA target @@ -16,3 +16,6 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) { declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll index 8a11ff509d2a2..3b4eff62ce0f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -29,24 +29,24 @@ define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -73,24 +73,24 @@ define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -116,22 +116,22 @@ define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY2]], [[PRED_COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -156,22 +156,22 @@ define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $sgpr2, $sgpr3 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX9-MIR-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; GFX9-MIR-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY3]], [[PRED_COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -197,20 +197,20 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -235,20 +235,20 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -274,18 +274,18 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -310,18 +310,18 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -347,20 +347,20 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: bb.1 (%ir-block.0): ; GFX8-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX8-MIR-NEXT: {{ $}} - ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR-NEXT: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): ; GFX9-MIR-NEXT: liveins: $vgpr0, $vgpr1 ; GFX9-MIR-NEXT: {{ $}} - ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR-NEXT: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX9-MIR-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[PRED_COPY]], [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: $vgpr0 = PRED_COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR-NEXT: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 6d5c10369cbd3..2460b284872de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -16,37 +16,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpswap_i32_1d ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 - ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 + ; GFX10-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource") + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32), addrspace 7) $vgpr0 = COPY %3(s32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -64,31 +64,31 @@ body: | ; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource") + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32), addrspace 7) S_ENDPGM 0 ... @@ -105,37 +105,37 @@ body: | ; GFX6-LABEL: name: atomic_cmpswap_i64_1d ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 - ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 + ; GFX6-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 - ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 + ; GFX8-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX10-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 + ; GFX10-NEXT: $vgpr0_vgpr1 = PRED_COPY [[PRED_COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource") + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64), addrspace 7) $vgpr0_vgpr1 = COPY %3(s64) SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ... @@ -153,30 +153,30 @@ body: | ; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_256 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[PRED_COPY1]], [[PRED_COPY2]], [[PRED_COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource") + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64), addrspace 7) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 34b07a1d48e5d..22be5a115088b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -9,7 +9,8 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { ; CI-LABEL: is_private_vgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: s_load_dword s2, s[4:5], 0x32 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -18,9 +19,7 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x11 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CI-NEXT: flat_store_dword v[0:1], v0 ; CI-NEXT: s_endpgm @@ -81,9 +80,9 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { ; CI-LABEL: is_private_sgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x11 +; CI-NEXT: s_load_dword s0, s[4:5], 0x32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cbranch_scc1 .LBB1_2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index 8aee6d71e20ce..1846de2560636 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -9,7 +9,8 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { ; CI-LABEL: is_local_vgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CI-NEXT: s_load_dword s2, s[4:5], 0x33 ; CI-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -18,9 +19,7 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x10 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1 ; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; CI-NEXT: flat_store_dword v[0:1], v0 ; CI-NEXT: s_endpgm @@ -81,9 +80,9 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { ; CI-LABEL: is_local_sgpr: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_load_dword s0, s[4:5], 0x10 +; CI-NEXT: s_load_dword s0, s[4:5], 0x33 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cbranch_scc1 .LBB1_2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll index ae04ab5ca9a71..b6338b6d18fcf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,ALL %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s @@ -129,3 +129,6 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" } attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll index 4f51c551edf30..4ed139a0da260 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Error on non-hsa target @@ -16,3 +16,6 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) { declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index 4cf8dfe643e96..951a054adb7dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -7,16 +7,16 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -28,16 +28,16 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -49,21 +49,21 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i64 %ret to <2 x float> @@ -75,17 +75,17 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -98,46 +98,46 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -147,7 +147,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -161,46 +161,46 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -220,16 +220,16 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -243,16 +243,16 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) %cast = bitcast i32 %ret to float diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index 57261a41d84ae..12c5b07b015c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -7,19 +7,19 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -32,17 +32,17 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -55,50 +55,50 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[PRED_COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -108,7 +108,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY19]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -122,49 +122,49 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[PRED_COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -184,19 +184,19 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll index 6f1554ffb6172..7136d9e439038 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll @@ -4,7 +4,7 @@ declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) -; GFX908: LLVM ERROR: cannot select: %24:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %28:vgpr, %14:sgpr(<4 x s32>), %29:vgpr(s32), %30:vgpr, %27:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 4) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 7) (in function: buffer_atomic_add_f32_rtn) ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn: ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}], s{{[0-9]+}} offen glc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index 7794e80c3c30d..45894d3b304db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -8,29 +8,29 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -41,29 +41,29 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -75,27 +75,27 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -107,27 +107,27 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -140,46 +140,46 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -195,46 +195,46 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -256,44 +256,44 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -309,44 +309,44 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -366,29 +366,29 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -401,29 +401,29 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -434,29 +434,29 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -467,27 +467,27 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0 ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index cae772b032424..07b71598f42b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -8,29 +8,29 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -41,40 +41,40 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY10]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -91,53 +91,53 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -150,44 +150,44 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -197,51 +197,51 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -251,7 +251,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -262,53 +262,53 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add_4095 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %voffset = add i32 %voffset.base, 4095 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index 5ab03c10cbd14..d076ead5370a5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -7,15 +7,15 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -26,18 +26,18 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -48,20 +48,20 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -72,22 +72,22 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -100,44 +100,44 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -147,7 +147,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -158,22 +158,22 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %voffset = add i32 %voffset.base, 4095 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index 4cc3e555c7d28..0a7b608ba8025 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -8,15 +8,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -28,16 +28,16 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -50,40 +50,40 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -93,7 +93,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -106,43 +106,43 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -152,7 +152,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -164,15 +164,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret float %val @@ -184,15 +184,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret float %val @@ -204,15 +204,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret float %val @@ -224,15 +224,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret float %val @@ -244,15 +244,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret float %val @@ -264,15 +264,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret float %val @@ -284,18 +284,18 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -306,20 +306,20 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -330,22 +330,22 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -356,15 +356,15 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -375,15 +375,15 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -400,18 +400,18 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -422,15 +422,15 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = zext i8 %val to i32 @@ -443,16 +443,16 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = sext i8 %val to i32 @@ -467,40 +467,40 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -510,7 +510,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -523,40 +523,40 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -566,7 +566,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) %zext = zext i8 %val to i32 @@ -579,14 +579,14 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[PRED_COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %val @@ -597,14 +597,14 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[PRED_COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret float %val @@ -615,16 +615,16 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret float %val @@ -635,15 +635,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 16 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -655,15 +655,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -675,18 +675,18 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -698,15 +698,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret float %val @@ -717,15 +717,15 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret float %val @@ -736,17 +736,17 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 16 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -758,17 +758,17 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -780,17 +780,17 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -804,42 +804,42 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -849,7 +849,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 5000 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -863,43 +863,43 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE1]], [[PRED_COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -909,7 +909,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 5000 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index 93ce8bdacf9e7..fcbf45af4528a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -7,29 +7,29 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -40,27 +40,27 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -71,33 +71,33 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -108,38 +108,38 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -152,48 +152,48 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -209,43 +209,43 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -265,33 +265,33 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -302,33 +302,33 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY6]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -339,33 +339,33 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -377,33 +377,33 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -415,39 +415,39 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY4]], %11, [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -462,51 +462,51 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: %13:vgpr_32, dead %54:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY4]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY10]], [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -522,46 +522,46 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: %13:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index d27818afd62bd..d563c2f373eec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -7,15 +7,15 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -26,14 +26,14 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -44,17 +44,17 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -65,18 +65,18 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -87,19 +87,19 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -111,45 +111,45 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE2]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -169,17 +169,17 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -190,17 +190,17 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -211,17 +211,17 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -233,17 +233,17 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -255,20 +255,20 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY8]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -283,48 +283,48 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr7 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr8 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %15:vgpr_32, dead %40:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY8]], [[PRED_COPY10]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE2]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 939b932f5b277..c891f4ad91fa8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -8,15 +8,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -28,17 +28,17 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -51,41 +51,41 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -107,28 +107,28 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] - ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[V_CMP_EQ_U32_e64_]] + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[PRED_COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -150,44 +150,44 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -207,15 +207,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -226,15 +226,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -245,15 +245,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -264,15 +264,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -283,15 +283,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -302,15 +302,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -321,15 +321,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -340,17 +340,17 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -361,18 +361,18 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -383,19 +383,19 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY8]], [[REG_SEQUENCE]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -406,15 +406,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -426,15 +426,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -446,15 +446,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -465,15 +465,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -484,17 +484,17 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -506,43 +506,43 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE2]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -562,14 +562,14 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -580,16 +580,16 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -600,15 +600,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -620,15 +620,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -640,18 +640,18 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], %11, [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -663,15 +663,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -682,15 +682,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -701,15 +701,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -721,15 +721,15 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -741,18 +741,18 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], %11, [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -766,44 +766,44 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: %14:vgpr_32, dead %38:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], %14, [[REG_SEQUENCE1]], [[PRED_COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -826,42 +826,42 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[PRED_COPY4]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll index 9fe1fbd91a2a2..39f5d2a333be0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -7,29 +7,29 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -40,40 +40,40 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY10]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x half> %val @@ -90,53 +90,53 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY6]], [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY12]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY15]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY6]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY7]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -148,44 +148,44 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -195,51 +195,51 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -249,7 +249,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -260,29 +260,29 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret half %val @@ -293,29 +293,29 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret half %val @@ -326,29 +326,29 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret half %val @@ -359,29 +359,29 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret half %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index 1ffbf31ae52b9..f182a3205b265 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -7,15 +7,15 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -26,18 +26,18 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x float> %val @@ -48,20 +48,20 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <3 x float> %val @@ -72,22 +72,22 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY6]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -99,44 +99,44 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY5]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -146,7 +146,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -157,15 +157,15 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret float %val @@ -176,15 +176,15 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret float %val @@ -195,15 +195,15 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret float %val @@ -214,15 +214,15 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index ce4e7fd595192..2793d801777b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -7,29 +7,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -40,33 +40,33 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY7]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -83,38 +83,38 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -127,41 +127,41 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -177,41 +177,41 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -233,44 +233,44 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -286,44 +286,44 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -345,45 +345,45 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -399,45 +399,45 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -457,29 +457,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -490,29 +490,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -523,29 +523,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -556,29 +556,29 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 9b4656b46dabd..1f946113450df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -7,29 +7,29 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -42,41 +42,41 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -92,41 +92,41 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -148,44 +148,44 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -201,44 +201,44 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -260,45 +260,45 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; UNPACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -314,45 +314,45 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; PACKED-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index bab3c26716b21..c5ac7dd2294a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -8,15 +8,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -28,17 +28,17 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY6]], [[REG_SEQUENCE1]], [[PRED_COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -50,18 +50,18 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -73,19 +73,19 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -97,16 +97,16 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -119,41 +119,41 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -175,44 +175,44 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -234,45 +234,45 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY7]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -293,15 +293,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -313,15 +313,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -333,15 +333,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -353,15 +353,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void @@ -374,14 +374,14 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) ret void @@ -392,14 +392,14 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[PRED_COPY]], [[REG_SEQUENCE]], [[PRED_COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) ret void @@ -410,16 +410,16 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY6]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) ret void @@ -430,15 +430,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -450,15 +450,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -470,18 +470,18 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -493,15 +493,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) ret void @@ -512,15 +512,15 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) ret void @@ -531,17 +531,17 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -553,17 +553,17 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -575,17 +575,17 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -599,43 +599,43 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY6]], [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -658,44 +658,44 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PRED_COPY5]], [[PRED_COPY7]], 0, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY14]], [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[PRED_COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE1]], [[PRED_COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index d7e8d2a2730fe..40d613e14a12f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -11,46 +11,46 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val @@ -61,46 +61,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_glc ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_glc ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) ret i32 %val @@ -111,61 +111,61 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX7-LABEL: name: s_buffer_load_v2i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX8-LABEL: name: s_buffer_load_v2i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val @@ -176,76 +176,76 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val @@ -256,133 +256,133 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX6-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX6-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX6-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX6-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX6-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX7-LABEL: name: s_buffer_load_v8i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX7-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX7-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX7-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX7-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX7-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX8-LABEL: name: s_buffer_load_v8i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; GFX8-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec + ; GFX8-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY18]], implicit $exec + ; GFX8-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY19]], implicit $exec + ; GFX8-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY20]], implicit $exec + ; GFX8-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val @@ -393,229 +393,229 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX6-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX6-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX6-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX6-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX6-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX6-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX6-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX6-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX6-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX6-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX6-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX6-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX6-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX6-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX6-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX6-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX6-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX6-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX6-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX6-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX6-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX6-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX6-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX7-LABEL: name: s_buffer_load_v16i32 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX7-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX7-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX7-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX7-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX7-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX7-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX7-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX7-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX7-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX7-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX7-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX7-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX7-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX7-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX7-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX7-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX7-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX7-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX7-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX7-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX7-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX7-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX7-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX8-LABEL: name: s_buffer_load_v16i32 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[PRED_COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_32 = PRED_COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX8-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY21]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY22]], implicit $exec + ; GFX8-NEXT: $sgpr1 = PRED_COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY23]], implicit $exec + ; GFX8-NEXT: $sgpr2 = PRED_COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY24]], implicit $exec + ; GFX8-NEXT: $sgpr3 = PRED_COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY25]], implicit $exec + ; GFX8-NEXT: $sgpr4 = PRED_COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY26]], implicit $exec + ; GFX8-NEXT: $sgpr5 = PRED_COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[PRED_COPY27:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY27]], implicit $exec + ; GFX8-NEXT: $sgpr6 = PRED_COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY28]], implicit $exec + ; GFX8-NEXT: $sgpr7 = PRED_COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY29]], implicit $exec + ; GFX8-NEXT: $sgpr8 = PRED_COPY [[V_READFIRSTLANE_B32_8]] + ; GFX8-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY30]], implicit $exec + ; GFX8-NEXT: $sgpr9 = PRED_COPY [[V_READFIRSTLANE_B32_9]] + ; GFX8-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY31]], implicit $exec + ; GFX8-NEXT: $sgpr10 = PRED_COPY [[V_READFIRSTLANE_B32_10]] + ; GFX8-NEXT: [[PRED_COPY32:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY32]], implicit $exec + ; GFX8-NEXT: $sgpr11 = PRED_COPY [[V_READFIRSTLANE_B32_11]] + ; GFX8-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY33]], implicit $exec + ; GFX8-NEXT: $sgpr12 = PRED_COPY [[V_READFIRSTLANE_B32_12]] + ; GFX8-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY34]], implicit $exec + ; GFX8-NEXT: $sgpr13 = PRED_COPY [[V_READFIRSTLANE_B32_13]] + ; GFX8-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY35]], implicit $exec + ; GFX8-NEXT: $sgpr14 = PRED_COPY [[V_READFIRSTLANE_B32_14]] + ; GFX8-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY20]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY36]], implicit $exec + ; GFX8-NEXT: $sgpr15 = PRED_COPY [[V_READFIRSTLANE_B32_15]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val @@ -626,45 +626,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) ret i32 %val @@ -675,43 +675,43 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) ret i32 %val @@ -722,45 +722,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) ret i32 %val @@ -771,43 +771,43 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) ret i32 %val @@ -818,43 +818,43 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) ret i32 %val @@ -865,45 +865,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) ret i32 %val @@ -914,44 +914,44 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) ret i32 %val @@ -962,45 +962,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) ret i32 %val @@ -1011,46 +1011,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) ret i32 %load @@ -1061,45 +1061,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) ret i32 %load @@ -1110,45 +1110,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) ret i32 %load @@ -1159,45 +1159,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) ret i32 %load @@ -1208,45 +1208,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) ret i32 %load @@ -1257,45 +1257,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) ret i32 %load @@ -1306,45 +1306,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) ret i32 %load @@ -1355,45 +1355,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) ret i32 %load @@ -1404,45 +1404,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) ret i32 %load @@ -1453,44 +1453,44 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) ret i32 %load @@ -1501,45 +1501,45 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = PRED_COPY [[V_READFIRSTLANE_B32_]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) ret i32 %load @@ -1551,43 +1551,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1598,52 +1598,52 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val @@ -1654,61 +1654,61 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val @@ -1719,64 +1719,64 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val @@ -1787,94 +1787,94 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1885,148 +1885,148 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2037,43 +2037,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2085,43 +2085,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2133,43 +2133,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2182,94 +2182,94 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2282,94 +2282,94 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2381,148 +2381,148 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2534,148 +2534,148 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY6]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY7]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY8]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY9]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY10]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY11]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr8 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr9 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr10 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr11 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr12 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr13 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr14 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr15 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2688,39 +2688,39 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2728,45 +2728,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2774,45 +2774,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2820,7 +2820,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2832,37 +2832,37 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -2870,43 +2870,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -2914,43 +2914,43 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -2958,7 +2958,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2971,41 +2971,41 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3013,47 +3013,47 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3061,47 +3061,47 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3109,7 +3109,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3122,30 +3122,30 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3160,36 +3160,36 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3204,36 +3204,36 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3248,7 +3248,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val @@ -3260,39 +3260,39 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3300,45 +3300,45 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3346,36 +3346,36 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3390,7 +3390,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val @@ -3403,38 +3403,38 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3443,59 +3443,59 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3504,59 +3504,59 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE1]], [[PRED_COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3565,22 +3565,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3594,42 +3594,42 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3638,63 +3638,63 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3703,63 +3703,63 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3768,22 +3768,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3795,42 +3795,42 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -3839,63 +3839,63 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -3904,63 +3904,63 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_ADD_I32_]] ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY13]], [[PRED_COPY11]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY5]], [[REG_SEQUENCE1]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -3969,22 +3969,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY20]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY21]] + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY20]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY21]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -3996,39 +3996,39 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4037,60 +4037,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4099,60 +4099,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4161,22 +4161,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4188,39 +4188,39 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4229,60 +4229,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4291,60 +4291,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4353,22 +4353,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4380,39 +4380,39 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.3: ; GFX6-NEXT: successors: %bb.4, %bb.2 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4421,60 +4421,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.3: ; GFX7-NEXT: successors: %bb.4, %bb.2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4483,60 +4483,60 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY12]], [[PRED_COPY10]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.3: ; GFX8-NEXT: successors: %bb.4, %bb.2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE1]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4545,22 +4545,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY19]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY20]] + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY19]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY20]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -4572,30 +4572,30 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.2: - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX6-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX6-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX6-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6-NEXT: {{ $}} @@ -4612,51 +4612,51 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: bb.5: ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX6-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX6-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX6-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX6-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX6-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX6-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX6-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX6-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX6-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX6-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX6-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX6-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX6-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX6-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX6-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX6-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY12]] + ; GFX6-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY13]] + ; GFX6-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY14]] + ; GFX6-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY15]] + ; GFX6-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY16]] + ; GFX6-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY17]] + ; GFX6-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY18]] + ; GFX6-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY19]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.2: - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX7-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX7-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX7-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7-NEXT: {{ $}} @@ -4673,51 +4673,51 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: bb.5: ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX7-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX7-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX7-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX7-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX7-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX7-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX7-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX7-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX7-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX7-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX7-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX7-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX7-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX7-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX7-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX7-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY12]] + ; GFX7-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY13]] + ; GFX7-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY14]] + ; GFX7-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY15]] + ; GFX7-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY16]] + ; GFX7-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY17]] + ; GFX7-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY18]] + ; GFX7-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY19]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.2: - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX8-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec - ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec + ; GFX8-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX8-NEXT: [[PRED_COPY11:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY10]], [[PRED_COPY8]], implicit $exec + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY11]], [[PRED_COPY9]], implicit $exec ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8-NEXT: {{ $}} @@ -4734,22 +4734,22 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: bb.5: ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 - ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 - ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 - ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub3 - ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub4 - ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub5 - ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub6 - ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub7 - ; GFX8-NEXT: $vgpr0 = COPY [[COPY12]] - ; GFX8-NEXT: $vgpr1 = COPY [[COPY13]] - ; GFX8-NEXT: $vgpr2 = COPY [[COPY14]] - ; GFX8-NEXT: $vgpr3 = COPY [[COPY15]] - ; GFX8-NEXT: $vgpr4 = COPY [[COPY16]] - ; GFX8-NEXT: $vgpr5 = COPY [[COPY17]] - ; GFX8-NEXT: $vgpr6 = COPY [[COPY18]] - ; GFX8-NEXT: $vgpr7 = COPY [[COPY19]] + ; GFX8-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub0 + ; GFX8-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub1 + ; GFX8-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub2 + ; GFX8-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub3 + ; GFX8-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub4 + ; GFX8-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub5 + ; GFX8-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub6 + ; GFX8-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE2]].sub7 + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY12]] + ; GFX8-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY13]] + ; GFX8-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY14]] + ; GFX8-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY15]] + ; GFX8-NEXT: $vgpr4 = PRED_COPY [[PRED_COPY16]] + ; GFX8-NEXT: $vgpr5 = PRED_COPY [[PRED_COPY17]] + ; GFX8-NEXT: $vgpr6 = PRED_COPY [[PRED_COPY18]] + ; GFX8-NEXT: $vgpr7 = PRED_COPY [[PRED_COPY19]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -4760,43 +4760,43 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4808,43 +4808,43 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4856,52 +4856,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 @@ -4914,52 +4914,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY6]], [[PRED_COPY4]], 0, implicit $exec ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 @@ -4973,49 +4973,49 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PRED_COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[PRED_COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v @@ -5028,52 +5028,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX6: bb.1 (%ir-block.0): ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX6-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX6-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX6-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX7-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX7-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX7-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX8-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY4]], [[PRED_COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll index 913b4091666ab..dc06738e189f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll @@ -6,9 +6,9 @@ define amdgpu_ps float @softwqm_f32(float %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[SOFT_WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SOFT_WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.softwqm.f32(float %val) ret float %ret @@ -19,9 +19,9 @@ define amdgpu_ps float @softwqm_v2f16(float %arg) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[SOFT_WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vgpr_32 = SOFT_WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[SOFT_WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.softwqm.v2f16(<2 x half> %val) @@ -34,14 +34,14 @@ define amdgpu_ps <2 x float> @softwqm_f64(double %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vreg_64 = SOFT_WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.softwqm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -61,17 +61,17 @@ define amdgpu_ps <3 x float> @softwqm_v3f32(<3 x float> %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[SOFT_WQM:%[0-9]+]]:vreg_96 = SOFT_WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[SOFT_WQM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[SOFT_WQM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.softwqm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index 9fa37a2a68f81..c1ff26fcd5931 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -7,18 +7,18 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -30,18 +30,18 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -53,23 +53,23 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i64 %ret to <2 x float> @@ -81,19 +81,19 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -106,49 +106,49 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -158,7 +158,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -172,49 +172,49 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -235,18 +235,18 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) %cast = bitcast i32 %ret to float diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index d5d2fc0ca831b..81f62921ef74f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -8,21 +8,21 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -35,19 +35,19 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -60,53 +60,53 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY19]], [[PRED_COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY20]], [[PRED_COPY18]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY11]], %subreg.sub0, [[PRED_COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -116,7 +116,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[COPY21]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %cast = bitcast i32 %ret to float @@ -130,52 +130,52 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY19]], [[COPY17]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY19]], [[PRED_COPY17]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY20]], [[PRED_COPY18]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY8]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY8]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY11]], %subreg.sub0, [[PRED_COPY12]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -195,21 +195,21 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[COPY]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll index 13d82003ec033..35feb107403a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll @@ -1,7 +1,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s ; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 -; GFX908: LLVM ERROR: cannot select: %29:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %40:vgpr, %15:sgpr(<4 x s32>), %41:vgpr(s32), %42:vgpr, %33:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32), align 1, addrspace 4) (in function: buffer_atomic_add_f32_rtn) +; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32), align 1, addrspace 7) (in function: buffer_atomic_add_f32_rtn) declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index 50d6074795f27..a3263c7c546eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -8,33 +8,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -45,33 +45,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -83,29 +83,29 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -117,29 +117,29 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -152,49 +152,49 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -210,49 +210,49 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -274,46 +274,46 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908-NEXT: successors: %bb.2(0x80000000) ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX908-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: successors: %bb.3(0x80000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX908-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX908-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX908-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX908-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX908-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.3: ; GFX908-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX908-NEXT: {{ $}} @@ -329,46 +329,46 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: successors: %bb.2(0x80000000) ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; GFX90A-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; GFX90A-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; GFX90A-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; GFX90A-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX90A-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; GFX90A-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY7]], [[PRED_COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -389,33 +389,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -426,29 +426,29 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -459,33 +459,33 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -496,29 +496,29 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908: bb.1 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): ; GFX90A-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; GFX90A-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; GFX90A-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; GFX90A-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; GFX90A-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY]], [[PRED_COPY5]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 7) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index 19bff0f4d614d..5aec9275d09f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -7,33 +7,33 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -44,44 +44,44 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY11]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY11]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -98,57 +98,57 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY13]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY13]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY10]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY10]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY16]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY8]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -161,47 +161,47 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -211,75 +211,75 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY17]], [[PRED_COPY21]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY18]], [[PRED_COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY19]], [[PRED_COPY24]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY20]], [[PRED_COPY25]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -289,10 +289,10 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY17]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -303,33 +303,33 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095 ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -341,33 +341,33 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %fval = bitcast i16 %val to half diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index 54bd85b95f2a7..b74ce7118a691 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -6,17 +6,17 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -27,20 +27,20 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -51,22 +51,22 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -77,24 +77,24 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -107,47 +107,47 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -157,14 +157,14 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY17]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY18]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY19]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -175,17 +175,17 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -197,17 +197,17 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %fval = bitcast i32 %val to float diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 9623fa14ecceb..679e59e7a7bab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -7,17 +7,17 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,20 +29,20 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x float> %val @@ -54,22 +54,22 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <3 x float> %val @@ -81,24 +81,24 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x float> %val @@ -110,18 +110,18 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -133,17 +133,17 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -155,17 +155,17 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) ret float %val @@ -178,47 +178,47 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -228,7 +228,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -239,17 +239,17 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = zext i8 %val to i32 @@ -262,18 +262,18 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = sext i8 %val to i32 @@ -286,17 +286,17 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = zext i16 %val to i32 @@ -309,18 +309,18 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec - ; CHECK-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[V_BFE_I32_e64_]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) %ext = sext i16 %val to i32 @@ -334,17 +334,17 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret half %val @@ -356,17 +356,17 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %val @@ -384,20 +384,20 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <4 x half> %val @@ -409,17 +409,17 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 5f0087912add3..60f5bcab020fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -7,33 +7,33 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -44,37 +44,37 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY8]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -91,42 +91,42 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY9]], [[PRED_COPY]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[PRED_COPY10]], [[PRED_COPY1]], implicit $exec + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[PRED_COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -138,49 +138,49 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -196,49 +196,49 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -258,33 +258,33 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; UNPACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; UNPACKED-NEXT: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; PACKED-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 31b8977e1513f..f566e2b3f0e49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -6,17 +6,17 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -27,19 +27,19 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,20 +50,20 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -74,21 +74,21 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -100,49 +100,49 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY6]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY18]], [[COPY16]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY17]], [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY18]], [[PRED_COPY16]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY7]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY7]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]], %subreg.sub0, [[PRED_COPY10]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -162,17 +162,17 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index dd5265a7b68e4..c0efbcb663fde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -7,17 +7,17 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -28,19 +28,19 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -51,20 +51,20 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY5]], %subreg.sub2, [[PRED_COPY6]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -75,21 +75,21 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY9]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -101,53 +101,53 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY6]], %subreg.sub2, [[PRED_COPY7]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vreg_128 = PRED_COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub0 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub2 + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE1]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY14]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY15]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY16]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY17]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY20]], [[COPY18]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY21]], [[COPY19]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY21:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE2]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY20]], [[PRED_COPY18]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY21]], [[PRED_COPY19]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY10]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY12]], %subreg.sub0, [[PRED_COPY13]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[PRED_COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -167,17 +167,17 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -189,17 +189,17 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -211,17 +211,17 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret void @@ -232,17 +232,17 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY4]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[PRED_COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -259,19 +259,19 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY5]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[PRED_COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll index bba5cebc0ca32..3fe180632b22f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -9,33 +9,33 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -46,44 +46,44 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY10]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY10]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY11]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY11]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x half> %val @@ -100,57 +100,57 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__ ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY7]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY8]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY7]], [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY8]], [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY13]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY13]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY10]], [[COPY15]], implicit $exec - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY9]], [[PRED_COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY10]], [[PRED_COPY15]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY16]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -161,35 +161,35 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) ret half %val @@ -201,47 +201,47 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: successors: %bb.2(0x80000000) ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; PACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; PACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.2: ; PACKED-NEXT: successors: %bb.3(0x80000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; PACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; PACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; PACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; PACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; PACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; PACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; PACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; PACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; PACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; PACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; PACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.3: ; PACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; PACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; PACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; PACKED-NEXT: {{ $}} @@ -251,57 +251,57 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: bb.5: - ; PACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 - ; PACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 - ; PACKED-NEXT: $vgpr0 = COPY [[COPY17]] - ; PACKED-NEXT: $vgpr1 = COPY [[COPY18]] + ; PACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 + ; PACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY17]] + ; PACKED-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY18]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; UNPACKED-LABEL: name: struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: successors: %bb.2(0x80000000) ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; UNPACKED-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; UNPACKED-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; UNPACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.2: ; UNPACKED-NEXT: successors: %bb.3(0x80000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; UNPACKED-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; UNPACKED-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; UNPACKED-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.3: ; UNPACKED-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4) + ; UNPACKED-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 7) ; UNPACKED-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; UNPACKED-NEXT: {{ $}} @@ -311,28 +311,28 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: bb.5: - ; UNPACKED-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 - ; UNPACKED-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 - ; UNPACKED-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 - ; UNPACKED-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 + ; UNPACKED-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 + ; UNPACKED-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 + ; UNPACKED-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 + ; UNPACKED-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub3 ; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; UNPACKED-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY17]], [[COPY21]], implicit $exec - ; UNPACKED-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY18]], [[COPY22]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY17]], [[PRED_COPY21]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY22:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY18]], [[PRED_COPY22]], implicit $exec ; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; UNPACKED-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY23]], [[V_AND_B32_e64_1]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY23]], [[V_AND_B32_e64_1]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec - ; UNPACKED-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY19]], [[COPY24]], implicit $exec - ; UNPACKED-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY20]], [[COPY25]], implicit $exec - ; UNPACKED-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY26]], [[V_AND_B32_e64_3]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY19]], [[PRED_COPY24]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; UNPACKED-NEXT: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY20]], [[PRED_COPY25]], implicit $exec + ; UNPACKED-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]] + ; UNPACKED-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[PRED_COPY26]], [[V_AND_B32_e64_3]], implicit $exec ; UNPACKED-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec - ; UNPACKED-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]] - ; UNPACKED-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]] + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[V_OR_B32_e64_]] + ; UNPACKED-NEXT: $vgpr1 = PRED_COPY [[V_OR_B32_e64_1]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x half> %val @@ -343,33 +343,33 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: bb.1 (%ir-block.0): ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; PACKED-NEXT: {{ $}} - ; PACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; PACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; PACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; PACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; PACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; PACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; PACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; PACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; PACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; PACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; PACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; PACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; PACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; PACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; PACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; PACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 ; UNPACKED: bb.1 (%ir-block.0): ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; UNPACKED-NEXT: {{ $}} - ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4) - ; UNPACKED-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] + ; UNPACKED-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; UNPACKED-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; UNPACKED-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; UNPACKED-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; UNPACKED-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; UNPACKED-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; UNPACKED-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; UNPACKED-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; UNPACKED-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; UNPACKED-NEXT: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 7) + ; UNPACKED-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index 3f6a03426aeaf..004f34eb3fe7e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -8,17 +8,17 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -29,20 +29,20 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <2 x float> %val @@ -53,22 +53,22 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <3 x float> %val @@ -79,24 +79,24 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY7]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY9]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY10]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY9]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY10]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -107,18 +107,18 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) ret float %val @@ -130,47 +130,47 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]] ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr_32 = PRED_COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY9]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY10]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY11]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY12]], implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub0_sub1 + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE1]].sub2_sub3 + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY15]], [[PRED_COPY13]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[PRED_COPY16]], [[PRED_COPY14]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PRED_COPY6]], implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[PRED_COPY6]], implicit $exec ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY8]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -180,14 +180,14 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY17]] - ; CHECK-NEXT: $vgpr1 = COPY [[COPY18]] - ; CHECK-NEXT: $vgpr2 = COPY [[COPY19]] - ; CHECK-NEXT: $vgpr3 = COPY [[COPY20]] + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub3 + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY17]] + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY18]] + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY19]] + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[PRED_COPY20]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) ret <4 x float> %val @@ -198,17 +198,17 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[PRED_COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll index 3af3c59c0092a..5cb194a98b2fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,9 @@ -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -104,3 +104,6 @@ define amdgpu_kernel void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index fa246079e7d28..e0dbee5a28d52 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,11 @@ -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -198,3 +198,6 @@ attributes #1 = { nounwind } !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} + +!llvm.module.flags = !{!99} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll index 7701910551234..516cef0bfc0d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll @@ -688,7 +688,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: s_and_b64 exec, exec, s[0:1] ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -736,7 +735,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -782,7 +780,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX10-32-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_and_b32 s1, s0, vcc_lo @@ -828,7 +825,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX10-64-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -932,7 +928,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; SI-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1 @@ -998,7 +993,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX9-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; GFX9-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], -1 @@ -1061,7 +1055,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX10-32-NEXT: v_mov_b32_e32 v3, v2 ; GFX10-32-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 ; GFX10-32-NEXT: s_and_b32 s2, s0, vcc_lo ; GFX10-32-NEXT: s_xor_b32 s2, s2, -1 @@ -1125,7 +1118,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX10-64-NEXT: v_mov_b32_e32 v3, v2 ; GFX10-64-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; GFX10-64-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; GFX10-64-NEXT: s_xor_b64 s[4:5], s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll index 585af3f7d683f..97d1022e07723 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll @@ -6,9 +6,9 @@ define amdgpu_ps float @wqm_f32(float %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.wqm.f32(float %val) ret float %ret @@ -19,9 +19,9 @@ define amdgpu_ps float @wqm_v2f16(float %arg) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[WQM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[WQM:%[0-9]+]]:vgpr_32 = WQM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[WQM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.wqm.v2f16(<2 x half> %val) @@ -34,14 +34,14 @@ define amdgpu_ps <2 x float> @wqm_f64(double %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[WQM:%[0-9]+]]:vreg_64 = WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.wqm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -61,17 +61,17 @@ define amdgpu_ps <3 x float> @wqm_v3f32(<3 x float> %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[WQM:%[0-9]+]]:vreg_96 = WQM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[WQM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[WQM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.wqm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll index f4ae6097bbc28..e64d969733bcc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll @@ -8,9 +8,9 @@ define amdgpu_ps float @wwm_f32(float %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.wwm.f32(float %val) ret float %ret @@ -21,9 +21,9 @@ define amdgpu_ps float @wwm_v2f16(float %arg) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half> %val) @@ -36,14 +36,14 @@ define amdgpu_ps <2 x float> @wwm_f64(double %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.wwm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -63,17 +63,17 @@ define amdgpu_ps <3 x float> @wwm_v3f32(<3 x float> %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float> %val) ret <3 x float> %ret @@ -84,9 +84,9 @@ define amdgpu_ps float @strict_wwm_f32(float %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.strict.wwm.f32(float %val) ret float %ret @@ -97,9 +97,9 @@ define amdgpu_ps float @strict_wwm_v2f16(float %arg) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[STRICT_WWM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[STRICT_WWM]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = bitcast float %arg to <2 x half> %ret = call <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half> %val) @@ -112,14 +112,14 @@ define amdgpu_ps <2 x float> @strict_wwm_f64(double %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: $vgpr0 = COPY [[COPY2]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY3]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY2]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY3]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %ret = call double @llvm.amdgcn.strict.wwm.f64(double %val) %bitcast = bitcast double %ret to <2 x float> @@ -139,17 +139,17 @@ define amdgpu_ps <3 x float> @strict_wwm_v3f32(<3 x float> %val) { ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2 ; GCN-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2 - ; GCN-NEXT: $vgpr0 = COPY [[COPY3]] - ; GCN-NEXT: $vgpr1 = COPY [[COPY4]] - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]] + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[STRICT_WWM]].sub2 + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY3]] + ; GCN-NEXT: $vgpr1 = PRED_COPY [[PRED_COPY4]] + ; GCN-NEXT: $vgpr2 = PRED_COPY [[PRED_COPY5]] ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ret = call <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float> %val) ret <3 x float> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll index f1ff3825c5faf..555d5647e624c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Runs original SDAG test with -global-isel ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP -enable-var-scope %S/../trap.ll @@ -14,3 +15,13 @@ ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT -enable-var-scope %S/../trap.ll ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -enable-var-scope %S/../trap.ll +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GCN-WARNING: {{.*}} +; HSA-TRAP: {{.*}} +; MESA-TRAP: {{.*}} +; NO-HSA-TRAP: {{.*}} +; NO-MESA-TRAP: {{.*}} +; NO-TRAP-BIT: {{.*}} +; NOMESA-TRAP: {{.*}} +; TRAP-BIT: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index 1a11b19cdc98f..9e0502ea91c57 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -232,11 +232,12 @@ define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { ; GFX9-LABEL: sink_null_insert_pt: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[16:17] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s16, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-NEXT: v_writelane_b32 v40, s16, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -247,11 +248,12 @@ define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { ; GFX9-NEXT: s_swappc_b64 s[30:31], 0 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index 0e540fb86d2b1..ae428b86dbaed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -76,8 +76,8 @@ bb.2: store volatile i32 0, i32 addrspace(1)* undef ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 -; DEFAULTSIZE: ; ScratchSize: 4112 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16 +; DEFAULTSIZE: ; ScratchSize: 16 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040 ; ASSUME1024: ; ScratchSize: 1040 @@ -139,8 +139,8 @@ bb.1: ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160 -; DEFAULTSIZE: ; ScratchSize: 4160 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64 +; DEFAULTSIZE: ; ScratchSize: 64 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1088 ; ASSUME1024: ; ScratchSize: 1088 @@ -269,3 +269,6 @@ bb.1: declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone speculatable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ASSUME1024: {{.*}} +; DEFAULTSIZE: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir index 4a7629651ea0c..6e046d6b10c78 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -42,8 +42,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[PRED_COPY]], [[TRUNC1]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -67,8 +67,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[PRED_COPY]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir index 9d195bc3d9e76..d64edbc51a74b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir @@ -31,8 +31,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_ADD %0, %1 @@ -50,8 +50,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir index 9526545467d55..497860aca8161 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -42,8 +42,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -63,8 +63,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir index a275498cb8ee0..30501b6f35a42 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp-compr.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[COPY2]](s32), [[COPY3]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 0, 0, [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr.v2f16), 0, 0, %0, %1, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir index 916f3f39e0e10..ef80d304f112f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir @@ -29,11 +29,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY3]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index 484c1beebea85..ac7de2f7be867 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -58,7 +58,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -85,7 +85,7 @@ body: | ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir index 5c94e136ff161..b41bb57e29de0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[PRED_COPY]](s1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 @@ -35,8 +35,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[PRED_COPY]](s1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64) %0:_(s32) = COPY $vgpr0 %1:_(s1) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir index ee0d18ac930d9..0dfcba8d23b18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[PRED_COPY]](s64), [[PRED_COPY1]](s32) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 @@ -35,8 +35,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s64), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[PRED_COPY]](s64), [[COPY1]](s32) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 @@ -54,8 +54,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY]](s64), [[PRED_COPY]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $sgpr0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir index 6667a3dd58d04..e0e3140ff5555 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.cvt.pkrtz.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -33,8 +33,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir index 16a77d4341166..f15f6c50d8d55 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir @@ -19,11 +19,11 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -48,12 +48,12 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[ICMP]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -78,11 +78,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY5]](s32), [[COPY6]](s32), [[ICMP]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[ICMP]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -107,8 +107,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[ICMP]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir index 6b1ad9079b25e..e79ae4a4d2640 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.scale.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[PRED_COPY]](s32), [[COPY1]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[PRED_COPY]](s32), 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir index 3be5ab9097f93..0bb60ad8bfafa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir @@ -15,9 +15,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir index 4254cf2296493..9621455d3cc04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir @@ -15,8 +15,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 @@ -35,9 +35,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir index c198d6f77d1a9..cfdb3fb66dfde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 @@ -33,9 +33,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir index 9f352b09650dc..338affef5eaaf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 @@ -33,9 +33,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY]](s32), 0, 0, 0, 0, 0, 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir index 1c82bfe77a1e0..11dbdfa8d3814 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir @@ -15,9 +15,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir index 10272e80280b5..eacefe68754a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir @@ -14,8 +14,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[PRED_COPY]](s32), 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir index 1add9772e50c5..187f9e73a8a01 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fcmp.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[PRED_COPY]](s32), [[COPY1]](s32), 1 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[PRED_COPY]](s32), 1 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir index eafd01001fcef..1ddbafcaf5c0e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.fmul.legacy.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[PRED_COPY]](s32), [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 @@ -33,8 +33,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir index 127cf59653151..7eb56d2537e04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.icmp.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 32 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[PRED_COPY]](s32), [[COPY1]](s32), 32 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[PRED_COPY]](s32), 32 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 3e4b0d4fa1e1d..a8129a0bfcf3c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -8,39 +8,39 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -53,41 +53,41 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -101,16 +101,16 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -144,7 +144,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -154,24 +154,24 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -205,7 +205,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -215,8 +215,8 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -230,18 +230,18 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; FAST-NEXT: {{ $}} @@ -274,7 +274,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -284,26 +284,26 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.5: - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY8]](s32) ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; GREEDY-NEXT: {{ $}} @@ -336,7 +336,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[PRED_COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -346,8 +346,8 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[PRED_COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index 8f62f75b97c6f..02b9836074aea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -8,46 +8,46 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -61,48 +61,48 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: bb.1 (%ir-block.0): ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr14 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY12]](s32) + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr10 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr11 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr12 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr13 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr14 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY12]](s32) + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) @@ -117,21 +117,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -165,7 +165,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -182,21 +182,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -230,7 +230,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -254,21 +254,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -294,7 +294,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -311,21 +311,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -351,7 +351,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -375,21 +375,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; FAST-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; FAST-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; FAST-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; FAST-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; FAST-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; FAST-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; FAST-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; FAST-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 + ; FAST-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr9 + ; FAST-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr10 + ; FAST-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr11 + ; FAST-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; FAST-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr12 ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; FAST-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -435,7 +435,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST-NEXT: bb.3: ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; FAST-NEXT: {{ $}} @@ -452,21 +452,21 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 - ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr7 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32), [[PRED_COPY4]](s32), [[PRED_COPY5]](s32), [[PRED_COPY6]](s32), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr8 + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr9 + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr10 + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr11 + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY8]](s32), [[PRED_COPY9]](s32), [[PRED_COPY10]](s32), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr12 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -512,7 +512,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[PRED_COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir index c16de1739ccfc..120ec96647f6c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.mov.mir @@ -15,8 +15,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[COPY2]](s32), 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), [[PRED_COPY]](s32), 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.mov), %0, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir index d80f22ce70c98..7322402d03ac5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.f16.mir @@ -16,8 +16,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[PRED_COPY]](s32), 1, 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 @@ -36,9 +36,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[COPY2]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), [[PRED_COPY]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1.f16), %0, 1, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir index 6575350b2ad8a..dcf5c4c5f63ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p1.mir @@ -15,8 +15,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[PRED_COPY]](s32), 1, 1, [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[COPY2]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), [[PRED_COPY]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p1), %0, 1, 1, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir index 80ea90e73eb09..176cb14d68f01 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.f16.mir @@ -17,9 +17,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, 1, [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -40,10 +40,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[COPY3]](s32), [[COPY4]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2.f16), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir index 19b45deb5e3ec..9e133e24ed4a0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.interp.p2.mir @@ -16,9 +16,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -39,10 +39,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[COPY3]](s32), [[COPY4]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.interp.p2), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), 1, 1, [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir index d419a21d0d967..116a2c89ec0fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.kill.mir @@ -16,8 +16,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY2]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -52,8 +52,8 @@ body: | bb.0: ; CHECK-LABEL: name: kill_constant_true ; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[C]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 true G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 ... @@ -66,8 +66,8 @@ body: | bb.0: ; CHECK-LABEL: name: kill_constant_false ; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[C]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 false G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir index 214d039c91d9e..277836bbc42d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir @@ -39,10 +39,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -88,10 +88,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -137,10 +137,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -186,10 +186,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -235,10 +235,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -284,10 +284,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -333,10 +333,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -382,10 +382,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -431,10 +431,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -480,10 +480,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<4 x s16>) = PRED_COPY [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[PRED_COPY]](<4 x s16>), [[PRED_COPY1]](<4 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 @@ -529,10 +529,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -578,10 +578,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -627,10 +627,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -676,10 +676,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -725,10 +725,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(s32) = COPY $sgpr32 %1:_(s32) = COPY $sgpr33 @@ -774,10 +774,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<32 x s32>) = PRED_COPY [[COPY2]](<32 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<32 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -823,10 +823,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -872,10 +872,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -921,10 +921,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<16 x s32>) = PRED_COPY [[COPY2]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<16 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 @@ -970,10 +970,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:agpr(<4 x s32>) = PRED_COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>), [[PRED_COPY2]](<4 x s32>), 0, 0, 0 ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) %0:_(<2 x s16>) = COPY $sgpr32 %1:_(<2 x s16>) = COPY $sgpr33 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index 5fca16e473a4e..9c90af8712312 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -8,17 +8,17 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -30,18 +30,18 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY7]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -54,15 +54,15 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -87,7 +87,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -97,7 +97,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -110,15 +110,15 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -126,15 +126,15 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY5]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -144,7 +144,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -157,15 +157,15 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -184,8 +184,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY5]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY5]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -193,7 +193,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY6]](s32), [[PRED_COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -203,7 +203,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir index 70acd1e3f99e2..b08038384fb2b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir index 27b75f99059cb..3562872cb352e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -70,9 +70,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -90,10 +90,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -113,8 +113,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -132,10 +132,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[PRED_COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 @@ -153,8 +153,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY]](s32), implicit $exec ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $agpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index b941a250004eb..15d1addf2fa86 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -8,31 +8,31 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GREEDY-LABEL: name: s_buffer_load_i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val @@ -43,39 +43,39 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val @@ -86,45 +86,45 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val @@ -135,75 +135,75 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val @@ -214,123 +214,123 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; CHECK-NEXT: $sgpr8 = COPY [[INT8]](s32) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; CHECK-NEXT: $sgpr9 = COPY [[INT9]](s32) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; CHECK-NEXT: $sgpr10 = COPY [[INT10]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; CHECK-NEXT: $sgpr11 = COPY [[INT11]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; CHECK-NEXT: $sgpr12 = COPY [[INT12]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[INT13]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[INT14]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; CHECK-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY13]](s32) + ; CHECK-NEXT: $sgpr8 = PRED_COPY [[INT8]](s32) + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY14]](s32) + ; CHECK-NEXT: $sgpr9 = PRED_COPY [[INT9]](s32) + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY15]](s32) + ; CHECK-NEXT: $sgpr10 = PRED_COPY [[INT10]](s32) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY16]](s32) + ; CHECK-NEXT: $sgpr11 = PRED_COPY [[INT11]](s32) + ; CHECK-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY17]](s32) + ; CHECK-NEXT: $sgpr12 = PRED_COPY [[INT12]](s32) + ; CHECK-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY18]](s32) + ; CHECK-NEXT: $sgpr13 = PRED_COPY [[INT13]](s32) + ; CHECK-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY19]](s32) + ; CHECK-NEXT: $sgpr14 = PRED_COPY [[INT14]](s32) + ; CHECK-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV15]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY20]](s32) + ; CHECK-NEXT: $sgpr15 = PRED_COPY [[INT15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GREEDY-NEXT: $sgpr8 = COPY [[INT8]](s32) - ; GREEDY-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GREEDY-NEXT: $sgpr9 = COPY [[INT9]](s32) - ; GREEDY-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GREEDY-NEXT: $sgpr10 = COPY [[INT10]](s32) - ; GREEDY-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GREEDY-NEXT: $sgpr11 = COPY [[INT11]](s32) - ; GREEDY-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GREEDY-NEXT: $sgpr12 = COPY [[INT12]](s32) - ; GREEDY-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GREEDY-NEXT: $sgpr13 = COPY [[INT13]](s32) - ; GREEDY-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GREEDY-NEXT: $sgpr14 = COPY [[INT14]](s32) - ; GREEDY-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GREEDY-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = PRED_COPY [[INT]](s32) + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = PRED_COPY [[INT1]](s32) + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = PRED_COPY [[INT2]](s32) + ; GREEDY-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = PRED_COPY [[INT3]](s32) + ; GREEDY-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = PRED_COPY [[INT4]](s32) + ; GREEDY-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = PRED_COPY [[INT5]](s32) + ; GREEDY-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = PRED_COPY [[INT6]](s32) + ; GREEDY-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = PRED_COPY [[INT7]](s32) + ; GREEDY-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY13]](s32) + ; GREEDY-NEXT: $sgpr8 = PRED_COPY [[INT8]](s32) + ; GREEDY-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY14]](s32) + ; GREEDY-NEXT: $sgpr9 = PRED_COPY [[INT9]](s32) + ; GREEDY-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY15]](s32) + ; GREEDY-NEXT: $sgpr10 = PRED_COPY [[INT10]](s32) + ; GREEDY-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY16]](s32) + ; GREEDY-NEXT: $sgpr11 = PRED_COPY [[INT11]](s32) + ; GREEDY-NEXT: [[PRED_COPY17:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY17]](s32) + ; GREEDY-NEXT: $sgpr12 = PRED_COPY [[INT12]](s32) + ; GREEDY-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY18]](s32) + ; GREEDY-NEXT: $sgpr13 = PRED_COPY [[INT13]](s32) + ; GREEDY-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY19]](s32) + ; GREEDY-NEXT: $sgpr14 = PRED_COPY [[INT14]](s32) + ; GREEDY-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV15]](s32) + ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PRED_COPY20]](s32) + ; GREEDY-NEXT: $sgpr15 = PRED_COPY [[INT15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val @@ -342,31 +342,31 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -377,35 +377,35 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val @@ -416,37 +416,37 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val @@ -457,39 +457,39 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val @@ -500,51 +500,51 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -555,71 +555,71 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -630,16 +630,16 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -647,16 +647,16 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; GREEDY-NEXT: S_ENDPGM 0 @@ -671,17 +671,17 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) @@ -693,17 +693,17 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) @@ -722,19 +722,19 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) @@ -752,19 +752,19 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) @@ -789,17 +789,17 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) @@ -811,17 +811,17 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) @@ -840,19 +840,19 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) @@ -870,19 +870,19 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) @@ -907,17 +907,17 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) @@ -929,17 +929,17 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) @@ -958,19 +958,19 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) @@ -988,19 +988,19 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) @@ -1025,17 +1025,17 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) @@ -1047,17 +1047,17 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) @@ -1076,19 +1076,19 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) @@ -1106,19 +1106,19 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) @@ -1142,37 +1142,37 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1184,37 +1184,37 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1226,35 +1226,35 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1267,57 +1267,57 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1330,55 +1330,55 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1390,77 +1390,77 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1472,75 +1472,75 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1553,13 +1553,13 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1584,7 +1584,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1592,19 +1592,19 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1629,7 +1629,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1637,7 +1637,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1649,14 +1649,14 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1681,7 +1681,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1689,20 +1689,20 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1727,7 +1727,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1735,7 +1735,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1748,15 +1748,15 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1781,7 +1781,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1789,21 +1789,21 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1828,7 +1828,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -1836,7 +1836,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1849,11 +1849,11 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -1888,17 +1888,17 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -1933,7 +1933,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val @@ -1945,13 +1945,13 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -1976,7 +1976,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -1984,19 +1984,19 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2021,7 +2021,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2029,7 +2029,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.5: - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val @@ -2042,14 +2042,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2074,8 +2074,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2085,27 +2085,27 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2130,8 +2130,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[PRED_COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2141,14 +2141,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2162,15 +2162,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2195,8 +2195,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2206,28 +2206,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2252,8 +2252,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2263,14 +2263,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2282,15 +2282,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2315,8 +2315,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2326,28 +2326,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY4]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF @@ -2372,8 +2372,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[PRED_COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2383,14 +2383,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2402,15 +2402,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2434,8 +2434,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2445,28 +2445,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2490,8 +2490,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2501,14 +2501,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2520,15 +2520,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2552,8 +2552,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2563,28 +2563,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2608,8 +2608,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2619,14 +2619,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2638,15 +2638,15 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2670,8 +2670,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -2681,28 +2681,28 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY5]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GREEDY-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec @@ -2726,8 +2726,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: bb.3: ; GREEDY-NEXT: successors: %bb.4, %bb.2 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GREEDY-NEXT: {{ $}} @@ -2737,14 +2737,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -2756,11 +2756,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -2798,24 +2798,24 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -2853,14 +2853,14 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY-NEXT: bb.5: ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY-NEXT: $vgpr0 = COPY [[UV8]](s32) - ; GREEDY-NEXT: $vgpr1 = COPY [[UV9]](s32) - ; GREEDY-NEXT: $vgpr2 = COPY [[UV10]](s32) - ; GREEDY-NEXT: $vgpr3 = COPY [[UV11]](s32) - ; GREEDY-NEXT: $vgpr4 = COPY [[UV12]](s32) - ; GREEDY-NEXT: $vgpr5 = COPY [[UV13]](s32) - ; GREEDY-NEXT: $vgpr6 = COPY [[UV14]](s32) - ; GREEDY-NEXT: $vgpr7 = COPY [[UV15]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr1 = PRED_COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr2 = PRED_COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr3 = PRED_COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr4 = PRED_COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr5 = PRED_COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr6 = PRED_COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr7 = PRED_COPY [[UV15]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val @@ -2871,35 +2871,35 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2911,35 +2911,35 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[PRED_COPY4]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2951,43 +2951,43 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 @@ -3000,43 +3000,43 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 @@ -3050,39 +3050,39 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY5]], [[C]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[PRED_COPY5]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY6]], [[PRED_COPY4]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[PRED_COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v @@ -3095,41 +3095,41 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; GREEDY-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; GREEDY-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY4]], [[PRED_COPY6]] + ; GREEDY-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[PRED_COPY7]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[PRED_COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index 45a2ab5b774c2..52c25df5ce727 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -21,12 +21,12 @@ body: | ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 256 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY1]], [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[PRED_COPY1]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; FAST-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 @@ -34,11 +34,11 @@ body: | ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 256 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PRED_COPY]], [[C]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[PRED_COPY]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY-NEXT: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 @@ -63,8 +63,8 @@ body: | ; FAST-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) @@ -75,8 +75,8 @@ body: | ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 - ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 49e352858f225..d470ebc18abcb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -8,16 +8,16 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -29,18 +29,18 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4) - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY7]](s32), [[PRED_COPY8]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -53,14 +53,14 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -85,7 +85,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[PRED_COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -95,7 +95,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -108,14 +108,14 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -123,15 +123,15 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY6]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -141,7 +141,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val @@ -154,14 +154,14 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -180,8 +180,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY6]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY6]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -189,7 +189,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY4]](s32), [[PRED_COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -199,7 +199,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: $vgpr0 = PRED_COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 94cad1624b562..69be5f2b751dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -8,16 +8,16 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -29,19 +29,19 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr6 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr7 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr8 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr9 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[PRED_COPY6]](s32) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY9]](s32), [[PRED_COPY10]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -54,15 +54,15 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -87,7 +87,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[PRED_COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -109,15 +109,15 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr2 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr(s32) = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -125,15 +125,15 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY7]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} @@ -155,15 +155,15 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr1 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr2 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32), [[PRED_COPY2]](s32), [[PRED_COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr4 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr5 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY $vgpr6 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32(s32) = PRED_COPY $vgpr7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} @@ -182,8 +182,8 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY7]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[PRED_COPY7]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec @@ -191,7 +191,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE [[PRED_COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[PRED_COPY5]](s32), [[PRED_COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 7) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir index 46184e8b4192a..761cf4cff3296 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.update.dpp.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[PRED_COPY]](p3), [[PRED_COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -36,8 +36,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[PRED_COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 @@ -57,8 +57,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[PRED_COPY]](s32), 0, 0, 0, 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir index babec48ab6ce2..1df7cf692b9b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.demote.mir @@ -16,8 +16,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY2]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -52,8 +52,8 @@ body: | bb.0: ; CHECK-LABEL: name: wqm_demote_constant_true ; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[C]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 true G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 ... @@ -66,8 +66,8 @@ body: | bb.0: ; CHECK-LABEL: name: wqm_demote_constant_false ; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[C]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[PRED_COPY]](s1) %0:_(s1) = G_CONSTANT i1 false G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir index c4238459bd04c..16d21924af761 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir index 5ae3d26893660..e081c713f6fb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wqm.vote.mir @@ -16,8 +16,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(ne), %0, %1 @@ -56,8 +56,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[PRED_COPY]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir index d319102845f29..6597093e4c342 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.writelane.mir @@ -15,8 +15,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir index 81469a9fb0cef..35d7977593997 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.wwm.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir index 145681d0591a0..15e701f8d6c94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -24,8 +24,8 @@ body: | ; CHECK-LABEL: name: amdgpu_wave_address_v ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[COPY]](p1) :: (store (p5), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[PRED_COPY]](p1) :: (store (p5), addrspace 1) %0:_(p1) = G_IMPLICIT_DEF %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %1, %0 :: (store (p5), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir index 3d03086dc4d45..b5a66007ab122 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir @@ -94,10 +94,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -173,10 +173,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[PRED_COPY1]], [[ICMP1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 0 @@ -198,10 +198,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir index 1bf143554a6af..090f6f80b356a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_AND %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_AND %0, %1 @@ -256,8 +256,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -285,8 +285,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -315,10 +315,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -349,10 +349,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] @@ -608,8 +608,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[PRED_COPY]], [[COPY1]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_AND %0, %1 @@ -627,8 +627,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir index 83db525eec756..4a030c24d3854 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir @@ -29,9 +29,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[DEF]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_ANYEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir index c304dc22ec495..7bcb60f72cc96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ashr.mir @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -231,8 +231,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -252,8 +252,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir index 40b888bb59fa3..2f65edc458461 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir @@ -15,10 +15,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p1), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 @@ -38,10 +38,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p0), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 @@ -61,10 +61,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[PRED_COPY]](p3), [[PRED_COPY1]], [[PRED_COPY2]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir index b83950a353ccb..12817cae0a5fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir index 35990196c01b4..7a1e704a697f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir index 682d423387620..898b1a4e7322b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir index 1c21ef14f6adf..5d78adabd82f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir index dad813d633707..d67d976d68a8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir index dc94745944e86..1a1f01ba17107 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir index 1fe11cea88066..543555ffc9404 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir index 69196490b5f38..527ca31f9b633 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir index 11bc4316a8276..6af95b932ff6f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir index dd7a52f62718e..c04c6d5016865 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir index bcc724313df95..2e169a17b4d73 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p1), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -34,9 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p0) = PRED_COPY [[COPY]](p0) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p0), [[PRED_COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -54,9 +54,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[PRED_COPY]](p3), [[PRED_COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir index f26371219f138..4a5627d8c3e23 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir @@ -92,8 +92,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: @@ -125,8 +125,8 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -158,8 +158,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -191,8 +191,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: G_BRCOND [[PRED_COPY]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir index 212dad017cba0..37c4cddf08b16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bswap.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:vgpr(s32) = G_BSWAP [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_BSWAP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir index 018fa56a6f105..13c85a7de51c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 @@ -112,8 +112,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $agpr0 @@ -135,8 +135,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $vgpr0 @@ -158,9 +158,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 @@ -182,9 +182,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir index 90e32744d22b9..d57c79fc2a0de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[COPY1]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[PRED_COPY]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -108,8 +108,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $agpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -129,8 +129,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $agpr0 @@ -150,9 +150,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $agpr0 %1:_(<2 x s16>) = COPY $sgpr0 @@ -172,9 +172,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(<2 x s16>) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[PRED_COPY]](<2 x s16>), [[PRED_COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $agpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir index baeed68aacdb9..194454b94fefb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir @@ -13,8 +13,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32)) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: G_STORE [[PRED_COPY]](s32), [[COPY]](p1) :: (store (s32)) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 G_STORE %1, %0 :: (store (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir index f76ed6dee3c55..e6d4976380f49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-dyn-stackalloc.mir @@ -22,8 +22,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align1 ; WAVE32: liveins: $sgpr0 @@ -31,8 +31,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 1 @@ -56,8 +56,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align2 ; WAVE32: liveins: $sgpr0 @@ -65,8 +65,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 2 @@ -90,8 +90,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align4 ; WAVE32: liveins: $sgpr0 @@ -99,8 +99,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 4 @@ -124,8 +124,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align8 ; WAVE32: liveins: $sgpr0 @@ -133,8 +133,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 8 @@ -158,8 +158,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_align16 ; WAVE32: liveins: $sgpr0 @@ -167,8 +167,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = COPY $sgpr0 %1:_(p5) = G_DYN_STACKALLOC %0, 16 @@ -192,8 +192,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -203,8 +203,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -230,8 +230,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -241,8 +241,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -268,8 +268,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -8192 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -279,8 +279,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY]], [[C]](s32) - ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C1]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -303,15 +303,15 @@ body: | ; WAVE64: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align4 ; WAVE32: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 4 @@ -335,8 +335,8 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align8 ; WAVE32: liveins: $sgpr0 @@ -344,8 +344,8 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 8 @@ -369,8 +369,8 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) ; WAVE32-LABEL: name: test_dyn_stackalloc_sgpr_constant_align16 ; WAVE32: liveins: $sgpr0 @@ -378,8 +378,8 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTR_ADD]](p5) %0:_(s32) = G_CONSTANT i32 32 %1:_(p5) = G_DYN_STACKALLOC %0, 16 @@ -403,8 +403,8 @@ body: | ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048 ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) @@ -414,8 +414,8 @@ body: | ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 32 ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C]], [[C1]](s32) - ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sp_reg - ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY]], [[SHL]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr(p5) = PRED_COPY $sp_reg + ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PRED_COPY]], [[SHL]](s32) ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1024 ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32) ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir index 544c4d6ac4275..307271d8b6437 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -94,8 +94,8 @@ body: | ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -147,8 +147,8 @@ body: | ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -244,8 +244,8 @@ body: | ; WAVE64-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} @@ -297,8 +297,8 @@ body: | ; WAVE32-NEXT: [[C14:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -418,9 +418,9 @@ body: | ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 @@ -456,9 +456,9 @@ body: | ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 @@ -509,9 +509,9 @@ body: | ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 @@ -547,9 +547,9 @@ body: | ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -572,8 +572,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -620,16 +620,16 @@ body: | ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -676,8 +676,8 @@ body: | ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 1 @@ -701,8 +701,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -749,16 +749,16 @@ body: | ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_addm1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -805,8 +805,8 @@ body: | ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 -1 @@ -830,8 +830,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -878,16 +878,16 @@ body: | ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_vv_idx_add16 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -934,8 +934,8 @@ body: | ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_CONSTANT i32 16 @@ -959,8 +959,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -990,9 +990,9 @@ body: | ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_vv_idx_add1 ; WAVE32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 @@ -1000,8 +1000,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1031,9 +1031,9 @@ body: | ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 @@ -1058,8 +1058,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1106,16 +1106,16 @@ body: | ; WAVE64-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE64-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE64-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE64-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE64-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) ; WAVE32-LABEL: name: extract_vector_elt_v16s32_sv_idx_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1162,8 +1162,8 @@ body: | ; WAVE32-NEXT: [[C15:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15 ; WAVE32-NEXT: [[ICMP14:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C15]] ; WAVE32-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP14]](s1), [[UV15]], [[SELECT13]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT14]](s32) - ; WAVE32-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT14]](s32) + ; WAVE32-NEXT: $vgpr0 = COPY [[PRED_COPY1]](s32) %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_CONSTANT i32 1 @@ -1187,8 +1187,8 @@ body: | ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE64-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE64-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE64-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE64-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1218,9 +1218,9 @@ body: | ; WAVE64-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE64-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE64-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE64-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE64-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; WAVE32-LABEL: name: extract_vector_elt_v8s64_sv_add1 ; WAVE32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 @@ -1228,8 +1228,8 @@ body: | ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; WAVE32-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; WAVE32-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[PRED_COPY]] ; WAVE32-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; WAVE32-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C1]] @@ -1259,9 +1259,9 @@ body: | ; WAVE32-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C7]] ; WAVE32-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV14]], [[SELECT10]] ; WAVE32-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[UV15]], [[SELECT11]] - ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SELECT12]](s32) - ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT13]](s32) - ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; WAVE32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT12]](s32) + ; WAVE32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT13]](s32) + ; WAVE32-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[PRED_COPY2]](s32) ; WAVE32-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir index 2516beca15cef..8f62ef51a5390 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FADD %0, %1 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FADD %0, %1 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir index bee37dfd8d8cc..1eb018cb0fda3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcanonicalize.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FCANONICALIZE %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir index 623d012a8f300..90d29bb18a622 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fceil.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:vgpr(s32) = G_FCEIL [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FCEIL %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir index 3cedf732fc083..cf162aa9dd809 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fcmp.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[PRED_COPY]](s32), [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_FCMP floatpred(uge), %0(s32), %1 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY2]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[PRED_COPY]](s32), [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_FCMP floatpred(uge), %0, %1 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(uge), [[COPY1]](s32), [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_FCMP floatpred(uge), %1, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir index 73fa491269b2e..f7511bb92ea73 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fexp2.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FEXP2_:%[0-9]+]]:vgpr(s32) = G_FEXP2 [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FEXP2 %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir index 054c835b99b6d..b9702d8402996 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-flog2.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FLOG2_:%[0-9]+]]:vgpr(s32) = G_FLOG2 [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FLOG2 %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir index 939b4b2792910..7fd39acb6b932 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fma.mir @@ -15,10 +15,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -37,9 +37,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -58,9 +58,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -79,9 +79,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[PRED_COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -100,8 +100,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 @@ -120,8 +120,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[PRED_COPY]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -140,8 +140,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[PRED_COPY]], [[COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir index f35b66dab1f73..0b3497a8c8376 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fmul.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FMUL %0, %1 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FMUL %0, %1 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FMUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir index 5ad64081f66a7..506cfd1f2563b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fpext.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:vgpr(s64) = G_FPEXT [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s64) = G_FPEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir index d82e215cb3761..6f2f5a8007360 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:vgpr(s32) = G_FPTOSI [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FPTOSI %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir index de26d470c3112..c6ccf95f9ae16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FPTOUI %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir index 7f6a5bde9c388..5d958bac4af3c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptrunc.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:vgpr(s32) = G_FPTRUNC [[PRED_COPY]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = G_FPTRUNC %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir index 724b66ae2c76e..2b8da09873541 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FRINT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir index 1c04833b84590..f68c4c78a928b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir @@ -15,10 +15,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -37,9 +37,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[PRED_COPY]], [[PRED_COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -58,9 +58,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[COPY1]], [[PRED_COPY1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -79,9 +79,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[PRED_COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -100,8 +100,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 @@ -120,8 +120,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[PRED_COPY]], [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -140,8 +140,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[PRED_COPY]], [[COPY1]], [[COPY2]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir index 1af02b5636e56..77349878c13bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[FSQRT]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_FSQRT %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir index d166d6e5f07f4..8b5c22f2d59bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_FSUB %0, %1 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FSUB %0, %1 @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[FSUB:%[0-9]+]]:vgpr(s32) = G_FSUB [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_FSUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir index 092cc0fbda099..1d3410fe33992 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.mir @@ -42,15 +42,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[COPY1]] ; GFX8-LABEL: name: icmp_eq_s32_sv ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -68,15 +68,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_eq_s32_vs ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_ICMP intpred(eq), %1, %0 @@ -118,9 +118,9 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_eq_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -145,15 +145,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_eq_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 @@ -171,15 +171,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_eq_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(eq), %1, %0 @@ -221,9 +221,9 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_ne_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -248,15 +248,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_ne_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(ne), %0, %1 @@ -274,15 +274,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_ne_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(ne), %1, %0 @@ -324,17 +324,17 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[PRED_COPY1]] ; GFX8-LABEL: name: icmp_slt_s64_ss ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[PRED_COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $sgpr2_sgpr3 %2:_(s1) = G_ICMP intpred(slt), %0, %1 @@ -352,15 +352,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[COPY1]] ; GFX8-LABEL: name: icmp_slt_s64_sv ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY2]](s64), [[COPY1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[PRED_COPY]](s64), [[COPY1]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(slt), %0, %1 @@ -378,15 +378,15 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[PRED_COPY]] ; GFX8-LABEL: name: icmp_slt_s64_vs ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[COPY2]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[PRED_COPY]] %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s1) = G_ICMP intpred(slt), %1, %0 @@ -430,18 +430,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: map_icmp_already_vcc_bank_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -464,18 +464,18 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX7-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX7-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) ; GFX8-LABEL: name: map_icmp_already_vcc_regclass_sgpr_inputs ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[PRED_COPY1]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir index ab845ac2a4463..e967c9e4da58b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-icmp.s16.mir @@ -16,9 +16,9 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s16) = G_TRUNC %0 @@ -40,8 +40,8 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[TRUNC1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 @@ -63,8 +63,8 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s16), [[TRUNC1]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s16), [[TRUNC1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir index 958ce444726e5..9e3f6609c87eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -52,24 +52,24 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY3]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY4]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -93,20 +93,20 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C1]] ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C2]] ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY4]](s32), [[C3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY1]](s32), [[C3]] ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) @@ -132,24 +132,24 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY6]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY3]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY7]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY4]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -174,8 +174,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<4 x s32>) = PRED_COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] @@ -215,20 +215,20 @@ body: | ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY4]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY5]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY2]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY6]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY3]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -253,19 +253,19 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[UV]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C1]] ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY1]], [[UV1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C2]] ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY1]], [[UV2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C3]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[PRED_COPY]](s32), [[C3]] ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY1]], [[UV3]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) @@ -383,9 +383,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]] @@ -415,57 +415,57 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY5]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY2]], [[UV3]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY7]], [[UV5]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY3]], [[UV4]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY4]], [[UV5]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY9]], [[UV7]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY5]], [[UV6]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY6]], [[UV7]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV8]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY11]], [[UV9]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY7]], [[UV8]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY8]], [[UV9]] ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV10]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY13]], [[UV11]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY9]], [[UV10]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY10]], [[UV11]] ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV12]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY15]], [[UV13]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY11]], [[UV12]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY12]], [[UV13]] ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV14]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY17]], [[UV15]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY13]], [[UV14]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY14]], [[UV15]] ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV16]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY19]], [[UV17]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY15]], [[UV16]] + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY16]], [[UV17]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) @@ -491,9 +491,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s64>) = PRED_COPY [[COPY]](<8 x s64>) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<8 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] @@ -586,52 +586,52 @@ body: | ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32), [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[UV2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[UV2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV3]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY6]], [[UV5]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV4]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY3]], [[UV5]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY7]], [[UV6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[COPY8]], [[UV7]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY4]], [[UV6]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP2]](s1), [[PRED_COPY5]], [[UV7]] ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY9]], [[UV8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[COPY10]], [[UV9]] + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY6]], [[UV8]] + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP3]](s1), [[PRED_COPY7]], [[UV9]] ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY11]], [[UV10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[COPY12]], [[UV11]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY8]], [[UV10]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP4]](s1), [[PRED_COPY9]], [[UV11]] ; CHECK-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY13]], [[UV12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[COPY14]], [[UV13]] + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY10]], [[UV12]] + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP5]](s1), [[PRED_COPY11]], [[UV13]] ; CHECK-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C6]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY15]], [[UV14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[COPY16]], [[UV15]] + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY12]], [[UV14]] + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP6]](s1), [[PRED_COPY13]], [[UV15]] ; CHECK-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C7]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY17]], [[UV16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[COPY18]], [[UV17]] + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY14]], [[UV16]] + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV1]](s32) + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP7]](s1), [[PRED_COPY15]], [[UV17]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32), [[SELECT2]](s32), [[SELECT3]](s32), [[SELECT4]](s32), [[SELECT5]](s32), [[SELECT6]](s32), [[SELECT7]](s32), [[SELECT8]](s32), [[SELECT9]](s32), [[SELECT10]](s32), [[SELECT11]](s32), [[SELECT12]](s32), [[SELECT13]](s32), [[SELECT14]](s32), [[SELECT15]](s32) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[BUILD_VECTOR]](<16 x s32>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) @@ -787,16 +787,16 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s32>) = PRED_COPY [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<2 x s32>) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[UV]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY1]], [[UV]] ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[COPY5]], [[UV1]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[PRED_COPY2]], [[UV1]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir index 609065a5b000e..60ec8a3d1dff6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[COPY1]](s32), 0 %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -50,8 +50,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[PRED_COPY]](s32), 0 %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -122,8 +122,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY1]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[COPY1]](s32), 0 %0:_(s64) = COPY $agpr0_agpr1 %1:_(s32) = COPY $vgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -140,8 +140,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[COPY2]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY]], [[PRED_COPY]](s32), 0 %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $agpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -159,9 +159,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s64) = COPY $agpr0_agpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[PRED_COPY1]](s32), 0 %0:_(s64) = COPY $agpr0_agpr1 %1:_(s32) = COPY $sgpr2 %2:_(s64) = G_INSERT %0, %1, 0 @@ -178,9 +178,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[COPY2]], [[COPY3]](s32), 0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:vgpr(s64) = G_INSERT [[PRED_COPY]], [[PRED_COPY1]](s32), 0 %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $agpr2 %2:_(s64) = G_INSERT %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir index 35b0d3064abd2..766b0ae472e05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-trunc.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_TRUNC [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC_TRUNC %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 7058451127e23..38f4689c6caf7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -117,7 +117,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -139,7 +139,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -160,7 +160,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -187,7 +187,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -278,7 +278,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -299,7 +299,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -321,7 +321,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -342,7 +342,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -363,7 +363,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -390,7 +390,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -497,8 +497,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) @@ -514,8 +514,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p3) :: (load (s32), addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) @@ -532,8 +532,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -550,8 +550,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -568,8 +568,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -586,8 +586,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -620,8 +620,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s32), align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... @@ -638,8 +638,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p4) :: (load (s32), align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... @@ -656,8 +656,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p5) = PRED_COPY [[COPY]](p5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p5) :: (load (s32), addrspace 5) %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir index b2b650a6c6269..774f947db8786 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-lshr.mir @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -233,8 +233,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -254,8 +254,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir index d1cc33594f397..8cd221dd5f295 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -21,9 +21,9 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] @@ -86,16 +86,16 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UMULH]](s32) ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY2]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY3]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vcc(s1) = PRED_COPY [[UADDE1]](s1) ; GFX9MI-LABEL: name: mad_u64_u32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} @@ -104,9 +104,9 @@ body: | ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[PRED_COPY1]], [[MV]] ; GFX10-LABEL: name: mad_u64_u32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -117,13 +117,13 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UMULH]](s32) ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY1]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[UADDE1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -149,9 +149,9 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -177,8 +177,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[PRED_COPY]](s32), [[COPY1]], [[MV]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -204,9 +204,9 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -232,8 +232,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[PRED_COPY]], [[MV]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr1 @@ -259,8 +259,8 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[MV]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr1 @@ -313,9 +313,9 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] @@ -393,21 +393,21 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX8-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX8-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SMULH]](s32) ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY2]], [[UV]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY3]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX8-NEXT: [[PRED_COPY4:%[0-9]+]]:vcc(s1) = PRED_COPY [[XOR1]](s1) ; GFX9MI-LABEL: name: mad_i64_i32_ssv ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX9MI-NEXT: {{ $}} @@ -416,9 +416,9 @@ body: | ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]] + ; GFX9MI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9MI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[PRED_COPY]](s32), [[PRED_COPY1]], [[MV]] ; GFX10-LABEL: name: mad_i64_i32_ssv ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -432,16 +432,16 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[MUL]](s32) + ; GFX10-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SMULH]](s32) ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]] - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[UV]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY1]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1) + ; GFX10-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[XOR1]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -466,9 +466,9 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32) @@ -516,8 +516,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s64) = G_CONSTANT i64 0 @@ -540,9 +540,9 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX8-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec ; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]] @@ -593,8 +593,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s64) = G_CONSTANT i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir index 6cf0a826977fa..1f772837625a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir @@ -78,9 +78,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 @@ -100,9 +100,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir index e4f6ade86e8d9..a854d1effc8dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mul.mir @@ -31,8 +31,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_MUL %0, %1 @@ -50,8 +50,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_MUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir index 07db1fa2f0c09..650ff07a79a07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_OR %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_OR %0, %1 @@ -120,10 +120,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[ICMP]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $vgpr0 @@ -150,10 +150,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[PRED_COPY1]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -230,10 +230,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[PRED_COPY1]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -423,8 +423,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -452,8 +452,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -482,10 +482,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -516,10 +516,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[UV]], [[UV2]] @@ -775,8 +775,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[PRED_COPY]], [[COPY1]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_OR %0, %1 @@ -794,8 +794,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir index 8785bd0e0335d..9ee7c0098cdce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir @@ -258,22 +258,22 @@ body: | ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: @@ -289,22 +289,22 @@ body: | ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -347,8 +347,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -360,14 +360,14 @@ body: | ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: @@ -378,8 +378,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -391,14 +391,14 @@ body: | ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -441,8 +441,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -452,14 +452,14 @@ body: | ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: @@ -470,8 +470,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -481,14 +481,14 @@ body: | ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -738,10 +738,10 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: @@ -771,10 +771,10 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -836,9 +836,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: @@ -868,9 +868,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -913,8 +913,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -925,13 +925,13 @@ body: | ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: @@ -942,8 +942,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -954,13 +954,13 @@ body: | ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1007,22 +1007,22 @@ body: | ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: @@ -1037,22 +1037,22 @@ body: | ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1095,8 +1095,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1116,9 +1116,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: @@ -1129,8 +1129,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1150,9 +1150,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1206,8 +1206,8 @@ body: | ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1216,9 +1216,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: @@ -1240,8 +1240,8 @@ body: | ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1250,9 +1250,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1313,9 +1313,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: @@ -1344,9 +1344,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1407,10 +1407,10 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: @@ -1439,10 +1439,10 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1503,9 +1503,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: @@ -1534,9 +1534,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1676,7 +1676,7 @@ body: | ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} @@ -1685,16 +1685,16 @@ body: | ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 ; FAST-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) ; FAST-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 456 - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_result_scc_scc_sbranch ; GREEDY: bb.0: @@ -1710,7 +1710,7 @@ body: | ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} @@ -1719,11 +1719,11 @@ body: | ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[COPY4]](s1), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 123 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 456 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[C1]], [[C2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir index 8a81d1b6060fe..16c2c72340632 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi.mir @@ -340,19 +340,19 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $sgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_ss_vcc_sbranch @@ -364,19 +364,19 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $sgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -419,19 +419,19 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_sv_vcc_sbranch @@ -443,19 +443,19 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -498,19 +498,19 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_vs_vcc_sbranch @@ -522,19 +522,19 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -577,19 +577,19 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; FAST-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; FAST-NEXT: $vgpr0 = COPY [[PHI]](s32) ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 ; GREEDY-LABEL: name: phi_s32_vv_vcc_sbranch @@ -601,19 +601,19 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[PRED_COPY]] ; GREEDY-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY4]](s32), %bb.1 + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[COPY]](s32), %bb.0, [[COPY3]](s32), %bb.1 ; GREEDY-NEXT: $vgpr0 = COPY [[PHI]](s32) ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -897,22 +897,22 @@ body: | ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch ; GREEDY: bb.0: @@ -928,22 +928,22 @@ body: | ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -986,8 +986,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -999,14 +999,14 @@ body: | ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch ; GREEDY: bb.0: @@ -1017,8 +1017,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1030,14 +1030,14 @@ body: | ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1080,8 +1080,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1091,14 +1091,14 @@ body: | ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch ; GREEDY: bb.0: @@ -1109,8 +1109,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1120,14 +1120,14 @@ body: | ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1377,10 +1377,10 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch ; GREEDY: bb.0: @@ -1410,10 +1410,10 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1475,9 +1475,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch ; GREEDY: bb.0: @@ -1507,9 +1507,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1552,8 +1552,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1564,13 +1564,13 @@ body: | ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch ; GREEDY: bb.0: @@ -1581,8 +1581,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1593,13 +1593,13 @@ body: | ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC1]](s1) ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[COPY4]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PRED_COPY1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1646,22 +1646,22 @@ body: | ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; FAST-NEXT: G_BR %bb.2 ; FAST-NEXT: {{ $}} ; FAST-NEXT: bb.2: - ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; FAST-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch ; GREEDY: bb.0: @@ -1676,22 +1676,22 @@ body: | ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY1]] ; GREEDY-NEXT: G_BR %bb.2 ; GREEDY-NEXT: {{ $}} ; GREEDY-NEXT: bb.2: - ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[COPY3]](s1), %bb.0, [[ICMP1]](s1), %bb.1 - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY5]], [[COPY6]] + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[PRED_COPY]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[PRED_COPY2]], [[PRED_COPY3]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1734,8 +1734,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; FAST-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; FAST-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1755,9 +1755,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch ; GREEDY: bb.0: @@ -1768,8 +1768,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) ; GREEDY-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) @@ -1789,9 +1789,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[SELECT]](s32), %bb.0, [[ANYEXT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1845,8 +1845,8 @@ body: | ; FAST-NEXT: bb.1: ; FAST-NEXT: successors: %bb.2(0x80000000) ; FAST-NEXT: {{ $}} - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; FAST-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; FAST-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1855,9 +1855,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; FAST-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch ; GREEDY: bb.0: @@ -1879,8 +1879,8 @@ body: | ; GREEDY-NEXT: bb.1: ; GREEDY-NEXT: successors: %bb.2(0x80000000) ; GREEDY-NEXT: {{ $}} - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[PRED_COPY]] ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[C2]] @@ -1889,9 +1889,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[SELECT]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC2:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC2]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC2]](s1) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY1]](s1), [[PRED_COPY2]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT1]](s32) bb.0: successors: %bb.1, %bb.2 @@ -1952,9 +1952,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_s_sbranch ; GREEDY: bb.0: @@ -1983,9 +1983,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -2046,10 +2046,10 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_s_v_sbranch ; GREEDY: bb.0: @@ -2078,10 +2078,10 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 @@ -2142,9 +2142,9 @@ body: | ; FAST-NEXT: bb.2: ; FAST-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; FAST-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; FAST-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) ; GREEDY-LABEL: name: phi_s1_v_v_sbranch ; GREEDY: bb.0: @@ -2173,9 +2173,9 @@ body: | ; GREEDY-NEXT: bb.2: ; GREEDY-NEXT: [[PHI:%[0-9]+]]:vgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 ; GREEDY-NEXT: [[TRUNC3:%[0-9]+]]:vgpr(s1) = G_TRUNC [[PHI]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC3]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC3]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY]] ; GREEDY-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) bb.0: successors: %bb.1, %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir index f05909eb7ea0c..afd072fec6369 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 1 %2:_(p1) = G_PTR_ADD %0, %1 @@ -73,8 +73,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $sgpr0_sgpr1 %2:_(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir index 52d12455ce690..9c6dffe1b0d46 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptrmask.mir @@ -53,8 +53,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[C]](s64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 1 %2:_(p1) = G_PTRMASK %0, %1 @@ -73,8 +73,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[COPY2]](s64) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY1]](s64) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p1) = G_PTRMASK [[COPY]], [[PRED_COPY]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $sgpr0_sgpr1 %2:_(p1) = G_PTRMASK %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir index 3b1ead62e375d..c084abe8ad3af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sadde.mir @@ -57,9 +57,9 @@ body: | ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: sadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ body: | ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: sadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: sadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SADDE:%[0-9]+]]:vgpr(s32), [[SADDE1:%[0-9]+]]:vcc(s1) = G_SADDE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir index b752c7af7c9fa..d34e6f6502a88 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir @@ -43,9 +43,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 10 @@ -68,9 +68,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -153,12 +153,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:vgpr(s32) = G_ASHR [[SBFX]], [[C3]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SBFX]](s32), [[ASHR1]](s32) @@ -184,9 +184,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 @@ -214,8 +214,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] @@ -245,8 +245,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[PRED_COPY]], [[COPY1]](s32), [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[SBFX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir index 73cd344aedadb..14ab4912eae7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir @@ -53,9 +53,9 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -65,9 +65,9 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -92,9 +92,9 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY3]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,9 +104,9 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY3]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY3]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -130,8 +130,8 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY2]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -141,8 +141,8 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY2]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY2]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -165,9 +165,9 @@ body: | ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_vcc_ss ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -176,9 +176,9 @@ body: | ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -201,8 +201,8 @@ body: | ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[COPY3]] ; GREEDY-LABEL: name: select_s32_vcc_sv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} @@ -211,8 +211,8 @@ body: | ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY4]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[PRED_COPY]], [[COPY3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -235,8 +235,8 @@ body: | ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[PRED_COPY]] ; GREEDY-LABEL: name: select_s32_vcc_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; GREEDY-NEXT: {{ $}} @@ -245,8 +245,8 @@ body: | ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[PRED_COPY]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -338,11 +338,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -353,11 +353,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -383,11 +383,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_svs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -398,11 +398,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -427,11 +427,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_s64_svv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -442,11 +442,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -667,11 +667,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -682,11 +682,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -712,11 +712,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -727,11 +727,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -756,11 +756,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_v2s32_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -771,11 +771,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -996,11 +996,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1011,11 +1011,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1041,11 +1041,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1056,11 +1056,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1085,11 +1085,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) ; GREEDY-LABEL: name: select_v4s16_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1100,11 +1100,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[COPY3]](<4 x s16>) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(<2 x s16>) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[SELECT]](<2 x s16>), [[SELECT1]](<2 x s16>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1362,11 +1362,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_sv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1377,11 +1377,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1407,11 +1407,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_vs ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 @@ -1422,11 +1422,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1451,11 +1451,11 @@ body: | ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; FAST-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; FAST-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; FAST-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; FAST-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) ; GREEDY-LABEL: name: select_p1_scc_vv ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -1466,11 +1466,11 @@ body: | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p1) ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV]], [[UV2]] - ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[UV1]], [[UV3]] + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV]], [[UV2]] + ; GREEDY-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[UV1]], [[UV3]] ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -1693,8 +1693,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_vgpr_vv ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GREEDY-NEXT: {{ $}} @@ -1702,8 +1702,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -1724,10 +1724,10 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: select_s32_vgpr_ss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -1735,10 +1735,10 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -1759,8 +1759,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_sgpr_vv ; GREEDY: liveins: $sgpr0, $vgpr0, $vgpr1 ; GREEDY-NEXT: {{ $}} @@ -1768,8 +1768,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -1790,9 +1790,9 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[PRED_COPY1]] ; GREEDY-LABEL: name: select_s32_sgpr_vs ; GREEDY: liveins: $sgpr0, $vgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -1800,9 +1800,9 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[COPY1]], [[PRED_COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $sgpr1 @@ -1823,9 +1823,9 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY2]] ; GREEDY-LABEL: name: select_s32_sgpr_sv ; GREEDY: liveins: $sgpr0, $sgpr0, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -1833,9 +1833,9 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY2]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PRED_COPY]](s1), [[PRED_COPY1]], [[COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir index c4f490262fa56..5f935d8f395e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir @@ -183,10 +183,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[PRED_COPY]], [[C]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[ASHR]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 32 @@ -207,9 +207,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 33 @@ -230,9 +230,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 35 @@ -253,9 +253,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[UV]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[PRED_COPY]], 31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[SEXT_INREG]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SEXT_INREG %0, 63 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index e126e001f0b37..f69e42c95ad0c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -47,10 +47,10 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[PRED_COPY]], [[C]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[ASHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_SEXT %0 ... @@ -179,8 +179,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[SELECT]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[PRED_COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir index 342a4581018d9..a8bea5ea30495 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -31,8 +31,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -49,8 +49,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -67,8 +67,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -84,8 +84,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... @@ -102,8 +102,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir index 45206492483dc..e10ced8018b35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shl.mir @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -55,8 +55,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -126,8 +126,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[COPY2]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[PRED_COPY]], [[TRUNC1]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -151,8 +151,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY2]](s16) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s16) = PRED_COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[PRED_COPY]](s16) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -229,8 +229,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY2]], [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[PRED_COPY]], [[COPY1]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -250,8 +250,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[PRED_COPY]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir index e2198e66cb298..d49f9bdc1d19a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:vgpr(s32) = G_SITOFP [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_SITOFP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir index 51da926273ada..2d42311522d53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SMAX %0, %1 @@ -54,8 +54,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -213,8 +213,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -235,8 +235,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir index aa5d854a7a23e..3b11ae6b797dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir @@ -36,8 +36,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -58,8 +58,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -216,8 +216,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -238,8 +238,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir index d663079d9d450..20cc51174ce65 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir @@ -18,9 +18,9 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-LABEL: name: smulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -45,15 +45,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[COPY1]] ; GFX9-LABEL: name: smulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY2]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SMULH %0, %1 @@ -72,15 +72,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[PRED_COPY]] ; GFX9-LABEL: name: smulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir index 244c07e62b5b1..3b7542e41998b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ssube.mir @@ -57,9 +57,9 @@ body: | ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: ssube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ body: | ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: ssube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: ssube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[SSUBE:%[0-9]+]]:vgpr(s32), [[SSUBE1:%[0-9]+]]:vcc(s1) = G_SSUBE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index 918f576eae833..3878d7d446469 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -31,8 +31,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SUB %0, %1 @@ -50,8 +50,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir index 22dcc5ee78b51..3f43e08d7faee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uadde.mir @@ -56,9 +56,9 @@ body: | ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: uadde_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -68,9 +68,9 @@ body: | ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -92,10 +92,10 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: uadde_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -103,10 +103,10 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -128,8 +128,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: uadde_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -137,8 +137,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir index 93357880a0a20..1e798176492e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uaddo.mir @@ -33,8 +33,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 @@ -52,8 +52,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir index 5d962d93f5b68..ecc609bbb86eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir @@ -43,9 +43,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 10 @@ -68,9 +68,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY2]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[PRED_COPY]](s32), [[PRED_COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -153,12 +153,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UBFX]](s32), [[C2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 @@ -182,9 +182,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C1]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 @@ -212,8 +212,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY3]], [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s64) = PRED_COPY [[COPY]](s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[PRED_COPY]], [[COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] @@ -243,8 +243,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[PRED_COPY]], [[COPY1]](s32), [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[UBFX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir index e2117318206a1..a90ff9d2bf4bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY1]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[PRED_COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_UITOFP %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir index a6aa2eb7c4d5c..c1dabf86f8234 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir @@ -36,8 +36,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -58,8 +58,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -218,8 +218,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -240,8 +240,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir index 734cbc02ca2da..f4cac7cfe4f18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir @@ -37,8 +37,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -60,8 +60,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 @@ -222,8 +222,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[PRED_COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 @@ -244,8 +244,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[PRED_COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir index cf93b41c8cc3f..9a5411baa6f80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir @@ -18,9 +18,9 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY3]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[PRED_COPY1]] ; GFX9-LABEL: name: umulh_s32_ss ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} @@ -45,15 +45,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[COPY1]] ; GFX9-LABEL: name: umulh_s32_sv ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_UMULH %0, %1 @@ -72,15 +72,15 @@ body: | ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX6-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX6-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[PRED_COPY]] ; GFX9-LABEL: name: umulh_s32_vs ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_UMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir index 8159f1b982c36..fde87efed6342 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir @@ -43,7 +43,7 @@ body: | ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY %in_addr(p1) + ; GFX1010-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY %in_addr(p1) ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) @@ -56,20 +56,20 @@ body: | ; GFX1010-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from unknown-address + 48, align 4, addrspace 1) ; GFX1010-NEXT: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out_addr(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr(p1) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[PRED_COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) - ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_16(p1) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[PRED_COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst32:sgpr(s64) = G_CONSTANT i64 32 ; GFX1010-NEXT: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) - ; GFX1010-NEXT: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_32(p1) - ; GFX1010-NEXT: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_32(p1) + ; GFX1010-NEXT: G_STORE %load8_11(<4 x s32>), [[PRED_COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: %cst48:sgpr(s64) = G_CONSTANT i64 48 ; GFX1010-NEXT: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) - ; GFX1010-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_48(p1) - ; GFX1010-NEXT: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_addr_plus_48(p1) + ; GFX1010-NEXT: G_STORE %load12_15(<4 x s32>), [[PRED_COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %in_addr:_(p1) = COPY $sgpr0_sgpr1 %out_addr:_(p1) = COPY $sgpr2_sgpr3 @@ -117,19 +117,19 @@ body: | ; GFX1010-NEXT: {{ $}} ; GFX1010-NEXT: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX1010-NEXT: %out:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX1010-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %ptr(p4) + ; GFX1010-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY %ptr(p4) ; GFX1010-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (<4 x s32>), align 1, addrspace 4) ; GFX1010-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) ; GFX1010-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, align 1, addrspace 4) ; GFX1010-NEXT: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX1010-NEXT: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - ; GFX1010-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out(p1) - ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(p1) = PRED_COPY %out(p1) + ; GFX1010-NEXT: G_STORE %load0_3(<4 x s32>), [[PRED_COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010-NEXT: %cst_16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010-NEXT: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) - ; GFX1010-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_plus_16(p1) - ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) + ; GFX1010-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr(p1) = PRED_COPY %out_plus_16(p1) + ; GFX1010-NEXT: G_STORE %load4_7(<4 x s32>), [[PRED_COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010-NEXT: S_ENDPGM 0 %ptr:_(p4) = COPY $sgpr0_sgpr1 %out:_(p1) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir index 8ea6eb6633ef3..ad01ac6d8cefb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usube.mir @@ -57,9 +57,9 @@ body: | ; FAST-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] ; GREEDY-LABEL: name: usube_s32_vss ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 ; GREEDY-NEXT: {{ $}} @@ -69,9 +69,9 @@ body: | ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[COPY4]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[PRED_COPY]], [[PRED_COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -93,10 +93,10 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; FAST-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; FAST-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; FAST-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; FAST-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] ; GREEDY-LABEL: name: usube_s32_ssv ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 ; GREEDY-NEXT: {{ $}} @@ -104,10 +104,10 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; GREEDY-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; GREEDY-NEXT: [[PRED_COPY2:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[PRED_COPY]], [[PRED_COPY1]], [[PRED_COPY2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -129,8 +129,8 @@ body: | ; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; FAST-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; FAST-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; FAST-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; FAST-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; FAST-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[PRED_COPY]] ; GREEDY-LABEL: name: usube_s32_vvs ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 ; GREEDY-NEXT: {{ $}} @@ -138,8 +138,8 @@ body: | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32) - ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[COPY3]] + ; GREEDY-NEXT: [[PRED_COPY:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; GREEDY-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir index 714178e6e8337..63be15e6993b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-usubo.mir @@ -33,8 +33,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 @@ -52,8 +52,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index c0f72eccf5249..218d75ecf6087 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -22,24 +22,24 @@ body: | ; CHECK-NEXT: %agpr:agpr(s32) = COPY $agpr0 ; CHECK-NEXT: %voffset:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: %zero:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY %zero(s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY %zero(s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY %agpr(s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY %agpr(s32) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2 - ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]] + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[PRED_COPY1]](s32), implicit $exec + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) + ; CHECK-NEXT: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[PRED_COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} @@ -75,14 +75,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<8 x s32>) = PRED_COPY [[COPY]](<8 x s32>) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr_32(s32), [[UV1:%[0-9]+]]:vgpr_32(s32), [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32), [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32), [[UV6:%[0-9]+]]:vgpr_32(s32), [[UV7:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s32>) ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV1]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec @@ -92,7 +92,7 @@ body: | ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV6]](s32), implicit $exec ; CHECK-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UV7]](s32), implicit $exec ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:vgpr(s64), [[UV9:%[0-9]+]]:vgpr(s64), [[UV10:%[0-9]+]]:vgpr(s64), [[UV11:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[PRED_COPY]](<8 x s32>) ; CHECK-NEXT: [[UV12:%[0-9]+]]:sgpr(s64), [[UV13:%[0-9]+]]:sgpr(s64), [[UV14:%[0-9]+]]:sgpr(s64), [[UV15:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV12]](s64), [[UV8]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV13]](s64), [[UV9]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir index c6d11b3b41e41..7f3f46338d408 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir @@ -360,22 +360,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -392,22 +392,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -424,22 +424,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -456,22 +456,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -488,22 +488,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i8_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i8_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) @@ -520,22 +520,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i16_align2 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i16_align2 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p1) :: (invariant load (s16), addrspace 4) ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) @@ -552,22 +552,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: local_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: local_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) @@ -584,22 +584,22 @@ body: | ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX8-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX8-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: private_load_i8_align4 ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX9-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: private_load_i8_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir index 930a1d4e11537..f7a8f7e58893a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir @@ -32,8 +32,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[PRED_COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_XOR %0, %1 @@ -51,8 +51,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[COPY]], [[PRED_COPY]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_XOR %0, %1 @@ -120,10 +120,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY3]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY1]] ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $vgpr0 @@ -150,10 +150,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PRED_COPY]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[PRED_COPY1]], [[ICMP1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -230,10 +230,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY3]], [[ICMP]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[PRED_COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[TRUNC]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[PRED_COPY1]], [[ICMP]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 @@ -423,8 +423,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] @@ -452,8 +452,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[PRED_COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] @@ -482,10 +482,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY1]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] @@ -516,10 +516,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[PRED_COPY1]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(s32) = G_XOR [[UV]], [[UV2]] @@ -775,8 +775,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[PRED_COPY]], [[COPY1]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_XOR %0, %1 @@ -794,8 +794,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(<2 x s16>) = PRED_COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[PRED_COPY]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_XOR %0, %1 @@ -831,11 +831,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[PRED_COPY]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C1]](s1) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vcc(s1) = PRED_COPY [[C1]](s1) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[PRED_COPY1]] ; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index 059b72f63d899..ff60e96838f5c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -47,9 +47,9 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(s32) = PRED_COPY [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[PRED_COPY]](s32), [[C]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_ZEXT %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir index 6b4928e832106..698530e3b387d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir @@ -13,8 +13,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -31,8 +31,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -49,8 +49,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -67,8 +67,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p4) = PRED_COPY [[COPY]](p4) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -84,8 +84,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... @@ -102,8 +102,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p3) = PRED_COPY [[COPY]](p3) + ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[PRED_COPY]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 3ef20a453f4ef..3b3a5e1fe4fa9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -112,8 +112,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... @@ -129,8 +129,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... @@ -146,8 +146,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (volatile invariant load (s32) from %ir.ptr1) ... @@ -163,8 +163,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (invariant load acquire (s32) from %ir.ptr1) ... @@ -180,8 +180,8 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr(p1) = PRED_COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PRED_COPY]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index 62c4a5a1c6175..9a3d5b27b2b70 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s -; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s ; Test with gfx803 so that ; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require -; the queue ptr. Tests with code object v3 to test +; the queue ptr. Tests with code object v3 and above to test ; llvm.trap/llvm.debugtrap that require the queue ptr. @@ -17,11 +17,12 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-LABEL: parent_func_missing_inputs: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-NEXT: s_or_saveexec_b64 s[16:17], -1 -; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; FIXEDABI-NEXT: s_mov_b64 exec, s[16:17] -; FIXEDABI-NEXT: v_writelane_b32 v40, s33, 2 +; FIXEDABI-NEXT: s_mov_b32 s16, s33 ; FIXEDABI-NEXT: s_mov_b32 s33, s32 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[18:19], -1 +; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; FIXEDABI-NEXT: s_mov_b64 exec, s[18:19] +; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2 ; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 ; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 ; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 @@ -31,11 +32,12 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] ; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 ; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] ; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 -; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 -; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 -; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] +; FIXEDABI-NEXT: s_mov_b32 s33, s4 ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) ; FIXEDABI-NEXT: s_setpc_b64 s[30:31] call void @requires_all_inputs() @@ -274,16 +276,16 @@ define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i3 ; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr: ; FIXEDABI-SDAG: ; %bb.0: ; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x40 +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0 ; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 -; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s5 -; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc -; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s4 -; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 -; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc ; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1 +; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v4, s4 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc ; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0 ; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) @@ -295,12 +297,16 @@ define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i3 ; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr: ; FIXEDABI-GISEL: ; %bb.0: ; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x40 +; FIXEDABI-GISEL-NEXT: s_movk_i32 s4, 0xc0 +; FIXEDABI-GISEL-NEXT: s_mov_b32 s5, 0 +; FIXEDABI-GISEL-NEXT: s_load_dword s6, s[4:5], 0x0 +; FIXEDABI-GISEL-NEXT: s_movk_i32 s4, 0xc4 +; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0 ; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 ; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc ; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v0, s5 -; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc +; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s6 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s4 ; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 ; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc @@ -320,16 +326,29 @@ define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i3 } define void @is_shared_requires_queue_ptr(i8* %ptr) #0 { -; FIXEDABI-LABEL: is_shared_requires_queue_ptr: -; FIXEDABI: ; %bb.0: -; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x40 -; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) -; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 -; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 -; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[30:31] +; FIXEDABI-SDAG-LABEL: is_shared_requires_queue_ptr: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0 +; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-GISEL-LABEL: is_shared_requires_queue_ptr: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc4 +; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0 +; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) %zext = zext i1 %is.shared to i32 store volatile i32 %zext, i32 addrspace(1)* undef @@ -337,16 +356,29 @@ define void @is_shared_requires_queue_ptr(i8* %ptr) #0 { } define void @is_private_requires_queue_ptr(i8* %ptr) #0 { -; FIXEDABI-LABEL: is_private_requires_queue_ptr: -; FIXEDABI: ; %bb.0: -; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x44 -; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) -; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 -; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 -; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[30:31] +; FIXEDABI-SDAG-LABEL: is_private_requires_queue_ptr: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0 +; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-GISEL-LABEL: is_private_requires_queue_ptr: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc0 +; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0 +; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) %zext = zext i1 %is.private to i32 store volatile i32 %zext, i32 addrspace(1)* undef @@ -354,11 +386,21 @@ define void @is_private_requires_queue_ptr(i8* %ptr) #0 { } define void @trap_requires_queue() #0 { -; FIXEDABI-LABEL: trap_requires_queue: -; FIXEDABI: ; %bb.0: -; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FIXEDABI-NEXT: s_mov_b64 s[0:1], s[6:7] -; FIXEDABI-NEXT: s_trap 2 +; FIXEDABI-SDAG-LABEL: trap_requires_queue: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; FIXEDABI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_trap 2 +; +; FIXEDABI-GISEL-LABEL: trap_requires_queue: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc8 +; FIXEDABI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_trap 2 call void @llvm.trap() unreachable } diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir index 4b9506af9f407..1c09898523743 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -931,8 +931,9 @@ body: | ; GFX908: liveins: $agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr255, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr255, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec @@ -966,12 +967,13 @@ body: | ; GFX908-LABEL: name: a4_to_a4 ; GFX908: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 ; GFX90A-LABEL: name: a4_to_a4 ; GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF @@ -1002,12 +1004,13 @@ body: | ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 ; GFX90A-LABEL: name: a4_to_a4_overlap ; GFX90A: liveins: $agpr0_agpr1_agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index b139abf6d7a7d..8693cffc94ffe 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -20,7 +20,7 @@ body: | ; GFX908-LABEL: name: agpr32_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -891,10 +891,10 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -1134,9 +1134,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr32_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -1445,7 +1445,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2006,11 +2006,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -2278,7 +2278,7 @@ body: | ; GFX908-LABEL: name: agpr64_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -3151,11 +3151,11 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -3395,9 +3395,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr64_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3708,7 +3708,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4269,11 +4269,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -4541,7 +4541,7 @@ body: | ; GFX908-LABEL: name: agpr96_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -5416,12 +5416,12 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) - ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -5661,9 +5661,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr96_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -5976,7 +5976,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -6537,11 +6537,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -6809,7 +6809,7 @@ body: | ; GFX908-LABEL: name: agpr32_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 + ; GFX908-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -7680,10 +7680,10 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -7923,9 +7923,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr32_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 + ; GFX908-FLATSCR-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -8234,7 +8234,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr0, $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -8795,11 +8795,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -9066,7 +9066,7 @@ body: | ; GFX908-LABEL: name: agpr64_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 + ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -9939,11 +9939,11 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -10183,9 +10183,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr64_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 + ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -10496,7 +10496,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -11057,11 +11057,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: @@ -11327,7 +11327,7 @@ body: | ; GFX908-LABEL: name: agpr96_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 + ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -12202,12 +12202,12 @@ body: | ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 8904, implicit $exec - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) - ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5) + ; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1: @@ -12447,9 +12447,9 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr96_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 + ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-FLATSCR-NEXT: {{ $}} - ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -12762,7 +12762,7 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} - ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -13323,11 +13323,11 @@ body: | ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xff, 0x19, 0x0d, 0x90, 0xff, 0x19, 0x16, 0xe4, 0x00, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8904, $vgpr0, implicit $exec - ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) - ; GFX90A-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5) + ; GFX90A-FLATSCR-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 1d1048ada8709..946f192e911ff 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -231,5 +231,5 @@ attributes #1 = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index f5760793828b3..7361f0a995b4d 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -9,26 +9,33 @@ ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} +; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; CI-DAG: s_cmp_lg_u32 [[PTR]], -1 -; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0 -; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 +; CI-DAG: s_cselect_b64 vcc, -1, 0 +; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base ; GFX9: s_cmp_lg_u32 [[PTR]], -1 -; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[SSRC_SHARED_BASE]], 0 -; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 +; GFX9: s_cselect_b64 vcc, -1, 0 +; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] ; At most 2 digits. Make sure src_shared_base is not counted as a high ; number SGPR. -; HSA: NumSgprs: {{[0-9]+}} +; CI: NumSgprs: {{[0-9][0-9]+}} +; GFX9: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { %stof = addrspacecast i32 addrspace(3)* %ptr to i32* store volatile i32 7, i32* %stof @@ -68,26 +75,33 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} +; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; CI-DAG: s_cmp_lg_u32 [[PTR]], -1 -; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0 -; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 +; CI-DAG: s_cselect_b64 vcc, -1, 0 +; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} ; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16 +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]] ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; GFX9: s_cmp_lg_u32 [[PTR]], -1 -; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[SSRC_PRIVATE_BASE]], 0 -; GFX9: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 +; GFX9: s_cselect_b64 vcc, -1, 0 +; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc +; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] -; HSA: NumSgprs: {{[0-9]+}} +; CI: NumSgprs: {{[0-9][0-9]+}} +; GFX9: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 { %stof = addrspacecast i32 addrspace(5)* %ptr to i32* store volatile i32 7, i32* %stof @@ -141,16 +155,14 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(i32 addrspace(4) ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] -; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} -; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec -; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1 -; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]] +; CI-DAG: v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} +; CI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] +; CI-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0 ; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] -; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]] -; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]] +; HSA: ds_write_b32 [[CASTPTR]], v[[K]] define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* store volatile i32 0, i32 addrspace(3)* %ftos @@ -163,19 +175,14 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] -; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} -; CI-DAG v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; CI-DAG v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] -; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} -; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec -; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1 -; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]] +; CI-DAG: v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} +; CI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] +; CI-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0 ; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1 ; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] -; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} -; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} +; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* store volatile i32 0, i32 addrspace(5)* %ftos @@ -427,3 +434,6 @@ attributes #0 = { nounwind } attributes #1 = { nounwind convergent } attributes #2 = { nounwind readnone } attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 2a7e2e657e8eb..671e0a8886c1c 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -13,133 +13,133 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] a[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse @@ -522,258 +522,266 @@ define void @v32_asm_def_use(float %v0, float %v1) #0 { define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5) #3 { ; GFX908-LABEL: introduced_copy_to_sgpr: ; GFX908: ; %bb.0: ; %bb -; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc +; GFX908-NEXT: global_load_ushort v24, v[0:1], off glc ; GFX908-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX908-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX908-NEXT: s_load_dword s9, s[4:5], 0x18 -; GFX908-NEXT: s_mov_b32 s8, 0 -; GFX908-NEXT: s_mov_b32 s5, s8 +; GFX908-NEXT: s_load_dwordx2 s[10:11], s[4:5], 0x10 +; GFX908-NEXT: v_mov_b32_e32 v1, 0 +; GFX908-NEXT: s_load_dword s5, s[4:5], 0x18 +; GFX908-NEXT: s_mov_b32 s4, 0 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX908-NEXT: s_sub_i32 s4, 0, s3 -; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s9 -; GFX908-NEXT: v_mov_b32_e32 v19, 0 -; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GFX908-NEXT: v_mov_b32_e32 v0, 0 -; GFX908-NEXT: v_mov_b32_e32 v1, 0 -; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX908-NEXT: v_readfirstlane_b32 s10, v2 -; GFX908-NEXT: s_mul_i32 s4, s4, s10 -; GFX908-NEXT: s_mul_hi_u32 s4, s10, s4 -; GFX908-NEXT: s_add_i32 s10, s10, s4 -; GFX908-NEXT: s_mul_hi_u32 s4, s2, s10 -; GFX908-NEXT: s_mul_i32 s10, s4, s3 -; GFX908-NEXT: s_sub_i32 s2, s2, s10 -; GFX908-NEXT: s_add_i32 s11, s4, 1 -; GFX908-NEXT: s_sub_i32 s10, s2, s3 -; GFX908-NEXT: s_cmp_ge_u32 s2, s3 -; GFX908-NEXT: s_cselect_b32 s4, s11, s4 -; GFX908-NEXT: s_cselect_b32 s2, s10, s2 -; GFX908-NEXT: s_add_i32 s10, s4, 1 -; GFX908-NEXT: s_cmp_ge_u32 s2, s3 -; GFX908-NEXT: s_cselect_b32 s4, s10, s4 -; GFX908-NEXT: s_lshr_b32 s9, s9, 16 -; GFX908-NEXT: s_lshl_b64 s[12:13], s[4:5], 5 -; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s9 -; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5 -; GFX908-NEXT: s_lshl_b64 s[10:11], s[6:7], 5 -; GFX908-NEXT: s_or_b32 s10, s10, 28 +; GFX908-NEXT: s_sub_i32 s6, 0, s3 +; GFX908-NEXT: s_lshl_b64 s[8:9], s[10:11], 5 +; GFX908-NEXT: s_lshr_b32 s12, s5, 16 +; GFX908-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX908-NEXT: v_cvt_f32_f16_e32 v25, s5 +; GFX908-NEXT: v_cvt_f32_f16_e32 v26, s12 +; GFX908-NEXT: s_or_b32 s8, s8, 28 +; GFX908-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX908-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX908-NEXT: v_mov_b32_e32 v6, s10 +; GFX908-NEXT: v_mov_b32_e32 v7, s11 +; GFX908-NEXT: v_mul_lo_u32 v2, s6, v0 +; GFX908-NEXT: s_lshl_b64 s[6:7], s[0:1], 5 +; GFX908-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX908-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX908-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: v_mov_b32_e32 v3, s9 +; GFX908-NEXT: v_mul_lo_u32 v4, v0, s3 +; GFX908-NEXT: v_add_u32_e32 v5, 1, v0 +; GFX908-NEXT: v_sub_u32_e32 v4, s2, v4 +; GFX908-NEXT: v_cmp_le_u32_e32 vcc, s3, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX908-NEXT: v_subrev_u32_e32 v5, s3, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX908-NEXT: v_add_u32_e32 v5, 1, v0 +; GFX908-NEXT: v_cmp_le_u32_e32 vcc, s3, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX908-NEXT: v_lshlrev_b64 v[4:5], 5, v[0:1] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_readfirstlane_b32 s5, v16 -; GFX908-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX908-NEXT: s_mul_i32 s1, s1, s5 -; GFX908-NEXT: s_mul_hi_u32 s9, s0, s5 -; GFX908-NEXT: s_mul_i32 s0, s0, s5 -; GFX908-NEXT: s_add_i32 s1, s9, s1 -; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5 +; GFX908-NEXT: v_readfirstlane_b32 s2, v24 +; GFX908-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX908-NEXT: s_mul_i32 s1, s1, s2 +; GFX908-NEXT: s_mul_hi_u32 s3, s0, s2 +; GFX908-NEXT: s_mul_i32 s0, s0, s2 +; GFX908-NEXT: s_add_i32 s1, s3, s1 +; GFX908-NEXT: s_lshl_b64 s[8:9], s[0:1], 5 ; GFX908-NEXT: s_branch .LBB3_2 ; GFX908-NEXT: .LBB3_1: ; %bb12 ; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX908-NEXT: s_add_u32 s6, s6, s4 -; GFX908-NEXT: s_addc_u32 s7, s7, 0 -; GFX908-NEXT: s_add_u32 s10, s10, s12 -; GFX908-NEXT: s_addc_u32 s11, s11, s13 +; GFX908-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 +; GFX908-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v7, vcc +; GFX908-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc ; GFX908-NEXT: .LBB3_2: ; %bb9 ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB3_5 Depth 2 ; GFX908-NEXT: s_cbranch_scc0 .LBB3_1 ; GFX908-NEXT: ; %bb.3: ; %bb14 ; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX908-NEXT: s_mov_b32 s9, s8 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 -; GFX908-NEXT: v_mov_b32_e32 v6, s8 -; GFX908-NEXT: v_mov_b32_e32 v8, s8 -; GFX908-NEXT: v_mov_b32_e32 v5, s9 -; GFX908-NEXT: v_mov_b32_e32 v7, s9 -; GFX908-NEXT: v_mov_b32_e32 v9, s9 -; GFX908-NEXT: v_cmp_lt_i64_e64 s[14:15], s[6:7], 0 -; GFX908-NEXT: v_mov_b32_e32 v11, v5 -; GFX908-NEXT: s_mov_b64 s[16:17], s[10:11] -; GFX908-NEXT: v_mov_b32_e32 v10, v4 +; GFX908-NEXT: v_mov_b32_e32 v8, 0 +; GFX908-NEXT: v_mov_b32_e32 v9, 0 +; GFX908-NEXT: global_load_dwordx2 v[8:9], v[8:9], off +; GFX908-NEXT: s_mov_b32 s5, s4 +; GFX908-NEXT: v_mov_b32_e32 v13, s5 +; GFX908-NEXT: v_mov_b32_e32 v15, s5 +; GFX908-NEXT: v_mov_b32_e32 v17, s5 +; GFX908-NEXT: v_mov_b32_e32 v12, s4 +; GFX908-NEXT: v_mov_b32_e32 v14, s4 +; GFX908-NEXT: v_mov_b32_e32 v16, s4 +; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], 0, v[6:7] +; GFX908-NEXT: v_mov_b32_e32 v11, v3 +; GFX908-NEXT: v_mov_b32_e32 v19, v13 +; GFX908-NEXT: v_mov_b32_e32 v10, v2 +; GFX908-NEXT: v_mov_b32_e32 v18, v12 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_readfirstlane_b32 s5, v2 -; GFX908-NEXT: v_readfirstlane_b32 s9, v3 -; GFX908-NEXT: s_add_u32 s5, s5, 1 -; GFX908-NEXT: s_addc_u32 s9, s9, 0 -; GFX908-NEXT: s_mul_hi_u32 s19, s2, s5 -; GFX908-NEXT: s_mul_i32 s20, s3, s5 -; GFX908-NEXT: s_mul_i32 s18, s2, s5 -; GFX908-NEXT: s_mul_i32 s5, s2, s9 -; GFX908-NEXT: s_add_i32 s5, s19, s5 -; GFX908-NEXT: s_add_i32 s5, s5, s20 +; GFX908-NEXT: v_readfirstlane_b32 s2, v8 +; GFX908-NEXT: v_readfirstlane_b32 s3, v9 +; GFX908-NEXT: s_add_u32 s2, s2, 1 +; GFX908-NEXT: s_addc_u32 s3, s3, 0 +; GFX908-NEXT: s_mul_hi_u32 s5, s6, s2 +; GFX908-NEXT: s_mul_i32 s11, s7, s2 +; GFX908-NEXT: s_mul_i32 s10, s6, s2 +; GFX908-NEXT: s_mul_i32 s2, s6, s3 +; GFX908-NEXT: s_add_i32 s2, s5, s2 +; GFX908-NEXT: s_add_i32 s5, s2, s11 ; GFX908-NEXT: s_branch .LBB3_5 ; GFX908-NEXT: .LBB3_4: ; %bb58 ; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2 -; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] -; GFX908-NEXT: s_add_u32 s16, s16, s0 -; GFX908-NEXT: s_addc_u32 s17, s17, s1 +; GFX908-NEXT: v_add_co_u32_sdwa v8, vcc, v8, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX908-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v9, vcc +; GFX908-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[8:9] +; GFX908-NEXT: v_mov_b32_e32 v20, s9 +; GFX908-NEXT: v_add_co_u32_e64 v10, s[2:3], s8, v10 +; GFX908-NEXT: v_addc_co_u32_e64 v11, s[2:3], v11, v20, s[2:3] ; GFX908-NEXT: s_cbranch_vccz .LBB3_1 ; GFX908-NEXT: .LBB3_5: ; %bb16 ; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1 ; GFX908-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX908-NEXT: s_add_u32 s20, s16, s18 -; GFX908-NEXT: s_addc_u32 s21, s17, s5 -; GFX908-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc +; GFX908-NEXT: v_mov_b32_e32 v21, s5 +; GFX908-NEXT: v_add_co_u32_e32 v20, vcc, s10, v10 +; GFX908-NEXT: v_addc_co_u32_e32 v21, vcc, v11, v21, vcc +; GFX908-NEXT: global_load_dword v28, v[20:21], off offset:-12 glc ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc +; GFX908-NEXT: global_load_dword v27, v[20:21], off offset:-8 glc ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: global_load_dword v12, v19, s[20:21] offset:-4 glc +; GFX908-NEXT: global_load_dword v22, v[20:21], off offset:-4 glc ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: global_load_dword v12, v19, s[20:21] glc +; GFX908-NEXT: global_load_dword v20, v[20:21], off glc ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: ds_read_b64 v[12:13], v19 -; GFX908-NEXT: ds_read_b64 v[14:15], v0 -; GFX908-NEXT: s_and_b64 vcc, exec, s[14:15] +; GFX908-NEXT: ds_read_b64 v[20:21], v1 +; GFX908-NEXT: ds_read_b64 v[22:23], v0 +; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1] ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_cbranch_vccnz .LBB3_4 ; GFX908-NEXT: ; %bb.6: ; %bb51 ; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2 -; GFX908-NEXT: v_cvt_f32_f16_sdwa v22, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX908-NEXT: v_cvt_f32_f16_e32 v21, v21 -; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX908-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX908-NEXT: v_add_f32_e32 v24, v17, v12 -; GFX908-NEXT: v_add_f32_e32 v25, v18, v13 -; GFX908-NEXT: v_add_f32_e32 v26, 0, v12 -; GFX908-NEXT: v_add_f32_e32 v27, 0, v13 -; GFX908-NEXT: v_add_f32_e32 v15, v22, v15 -; GFX908-NEXT: v_add_f32_e32 v14, v21, v14 -; GFX908-NEXT: v_add_f32_e32 v13, v23, v13 -; GFX908-NEXT: v_add_f32_e32 v12, v20, v12 -; GFX908-NEXT: v_add_f32_e32 v5, v5, v25 -; GFX908-NEXT: v_add_f32_e32 v4, v4, v24 -; GFX908-NEXT: v_add_f32_e32 v7, v7, v27 -; GFX908-NEXT: v_add_f32_e32 v6, v6, v26 -; GFX908-NEXT: v_add_f32_e32 v8, v8, v14 -; GFX908-NEXT: v_add_f32_e32 v9, v9, v15 -; GFX908-NEXT: v_add_f32_e32 v10, v10, v12 -; GFX908-NEXT: v_add_f32_e32 v11, v11, v13 +; GFX908-NEXT: v_cvt_f32_f16_sdwa v29, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX908-NEXT: v_cvt_f32_f16_e32 v28, v28 +; GFX908-NEXT: v_cvt_f32_f16_sdwa v30, v27 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX908-NEXT: v_cvt_f32_f16_e32 v27, v27 +; GFX908-NEXT: v_add_f32_e32 v31, v25, v20 +; GFX908-NEXT: v_add_f32_e32 v32, v26, v21 +; GFX908-NEXT: v_add_f32_e32 v33, 0, v20 +; GFX908-NEXT: v_add_f32_e32 v34, 0, v21 +; GFX908-NEXT: v_add_f32_e32 v23, v29, v23 +; GFX908-NEXT: v_add_f32_e32 v22, v28, v22 +; GFX908-NEXT: v_add_f32_e32 v21, v30, v21 +; GFX908-NEXT: v_add_f32_e32 v20, v27, v20 +; GFX908-NEXT: v_add_f32_e32 v13, v13, v32 +; GFX908-NEXT: v_add_f32_e32 v12, v12, v31 +; GFX908-NEXT: v_add_f32_e32 v15, v15, v34 +; GFX908-NEXT: v_add_f32_e32 v14, v14, v33 +; GFX908-NEXT: v_add_f32_e32 v16, v16, v22 +; GFX908-NEXT: v_add_f32_e32 v17, v17, v23 +; GFX908-NEXT: v_add_f32_e32 v18, v18, v20 +; GFX908-NEXT: v_add_f32_e32 v19, v19, v21 ; GFX908-NEXT: s_branch .LBB3_4 ; ; GFX90A-LABEL: introduced_copy_to_sgpr: ; GFX90A: ; %bb.0: ; %bb -; GFX90A-NEXT: global_load_ushort v18, v[0:1], off glc +; GFX90A-NEXT: global_load_ushort v28, v[0:1], off glc ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX90A-NEXT: s_load_dword s9, s[4:5], 0x18 -; GFX90A-NEXT: s_mov_b32 s8, 0 -; GFX90A-NEXT: s_mov_b32 s5, s8 +; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x10 +; GFX90A-NEXT: s_load_dword s7, s[4:5], 0x18 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: s_mov_b32 s6, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX90A-NEXT: s_sub_i32 s4, 0, s3 -; GFX90A-NEXT: v_mov_b32_e32 v19, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GFX90A-NEXT: s_sub_i32 s12, 0, s3 +; GFX90A-NEXT: s_lshr_b32 s13, s7, 16 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s7 ; GFX90A-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s13 +; GFX90A-NEXT: s_lshl_b64 s[4:5], s[0:1], 5 +; GFX90A-NEXT: s_lshl_b64 s[10:11], s[8:9], 5 ; GFX90A-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX90A-NEXT: v_cvt_u32_f32_e32 v1, v0 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, s9 -; GFX90A-NEXT: v_readfirstlane_b32 s10, v1 -; GFX90A-NEXT: s_mul_i32 s4, s4, s10 -; GFX90A-NEXT: s_mul_hi_u32 s4, s10, s4 -; GFX90A-NEXT: s_add_i32 s10, s10, s4 -; GFX90A-NEXT: s_mul_hi_u32 s4, s2, s10 -; GFX90A-NEXT: s_mul_i32 s10, s4, s3 -; GFX90A-NEXT: s_sub_i32 s2, s2, s10 -; GFX90A-NEXT: s_add_i32 s11, s4, 1 -; GFX90A-NEXT: s_sub_i32 s10, s2, s3 -; GFX90A-NEXT: s_cmp_ge_u32 s2, s3 -; GFX90A-NEXT: s_cselect_b32 s4, s11, s4 -; GFX90A-NEXT: s_cselect_b32 s2, s10, s2 -; GFX90A-NEXT: s_add_i32 s10, s4, 1 -; GFX90A-NEXT: s_cmp_ge_u32 s2, s3 -; GFX90A-NEXT: s_cselect_b32 s4, s10, s4 -; GFX90A-NEXT: s_lshr_b32 s9, s9, 16 -; GFX90A-NEXT: s_lshl_b64 s[12:13], s[4:5], 5 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, s9 -; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5 -; GFX90A-NEXT: s_lshl_b64 s[10:11], s[6:7], 5 +; GFX90A-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX90A-NEXT: s_or_b32 s10, s10, 28 +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[8:9], s[8:9] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[10:11], s[10:11] op_sel:[0,1] +; GFX90A-NEXT: v_mul_lo_u32 v8, s12, v0 +; GFX90A-NEXT: v_mul_hi_u32 v8, v0, v8 +; GFX90A-NEXT: v_add_u32_e32 v0, v0, v8 +; GFX90A-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX90A-NEXT: v_mul_lo_u32 v8, v0, s3 +; GFX90A-NEXT: v_sub_u32_e32 v8, s2, v8 +; GFX90A-NEXT: v_add_u32_e32 v9, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc +; GFX90A-NEXT: v_subrev_u32_e32 v9, s3, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; GFX90A-NEXT: v_add_u32_e32 v9, 1, v0 +; GFX90A-NEXT: v_cmp_le_u32_e32 vcc, s3, v8 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc +; GFX90A-NEXT: v_lshlrev_b64 v[8:9], 5, v[0:1] +; GFX90A-NEXT: v_pk_mov_b32 v[10:11], 0, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_readfirstlane_b32 s5, v18 -; GFX90A-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX90A-NEXT: s_mul_i32 s1, s1, s5 -; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s5 -; GFX90A-NEXT: s_mul_i32 s0, s0, s5 -; GFX90A-NEXT: s_add_i32 s1, s9, s1 -; GFX90A-NEXT: s_lshl_b64 s[0:1], s[0:1], 5 +; GFX90A-NEXT: v_readfirstlane_b32 s2, v28 +; GFX90A-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX90A-NEXT: s_mul_i32 s1, s1, s2 +; GFX90A-NEXT: s_mul_hi_u32 s3, s0, s2 +; GFX90A-NEXT: s_mul_i32 s0, s0, s2 +; GFX90A-NEXT: s_add_i32 s1, s3, s1 +; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5 ; GFX90A-NEXT: s_branch .LBB3_2 ; GFX90A-NEXT: .LBB3_1: ; %bb12 ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX90A-NEXT: s_add_u32 s6, s6, s4 -; GFX90A-NEXT: s_addc_u32 s7, s7, 0 -; GFX90A-NEXT: s_add_u32 s10, s10, s12 -; GFX90A-NEXT: s_addc_u32 s11, s11, s13 +; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, v4, v0 +; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, v6, v8 +; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v9, vcc ; GFX90A-NEXT: .LBB3_2: ; %bb9 ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB3_5 Depth 2 ; GFX90A-NEXT: s_cbranch_scc0 .LBB3_1 ; GFX90A-NEXT: ; %bb.3: ; %bb14 ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 -; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; GFX90A-NEXT: s_mov_b32 s9, s8 -; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[8:9], s[8:9] op_sel:[0,1] -; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[8:9], s[8:9] op_sel:[0,1] -; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[8:9], s[8:9] op_sel:[0,1] -; GFX90A-NEXT: v_cmp_lt_i64_e64 s[14:15], s[6:7], 0 -; GFX90A-NEXT: s_mov_b64 s[16:17], s[10:11] -; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1] +; GFX90A-NEXT: global_load_dwordx2 v[12:13], v[10:11], off +; GFX90A-NEXT: s_mov_b32 s7, s6 +; GFX90A-NEXT: v_pk_mov_b32 v[16:17], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[18:19], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[20:21], s[6:7], s[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], 0, v[4:5] +; GFX90A-NEXT: v_pk_mov_b32 v[14:15], v[6:7], v[6:7] op_sel:[0,1] +; GFX90A-NEXT: v_pk_mov_b32 v[22:23], v[16:17], v[16:17] op_sel:[0,1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_readfirstlane_b32 s5, v4 -; GFX90A-NEXT: v_readfirstlane_b32 s9, v5 -; GFX90A-NEXT: s_add_u32 s5, s5, 1 -; GFX90A-NEXT: s_addc_u32 s9, s9, 0 -; GFX90A-NEXT: s_mul_hi_u32 s19, s2, s5 -; GFX90A-NEXT: s_mul_i32 s20, s3, s5 -; GFX90A-NEXT: s_mul_i32 s18, s2, s5 -; GFX90A-NEXT: s_mul_i32 s5, s2, s9 -; GFX90A-NEXT: s_add_i32 s5, s19, s5 -; GFX90A-NEXT: s_add_i32 s5, s5, s20 +; GFX90A-NEXT: v_readfirstlane_b32 s7, v12 +; GFX90A-NEXT: v_readfirstlane_b32 s8, v13 +; GFX90A-NEXT: s_add_u32 s7, s7, 1 +; GFX90A-NEXT: s_addc_u32 s9, s8, 0 +; GFX90A-NEXT: s_mul_hi_u32 s10, s4, s7 +; GFX90A-NEXT: s_mul_i32 s11, s5, s7 +; GFX90A-NEXT: s_mul_i32 s8, s4, s7 +; GFX90A-NEXT: s_mul_i32 s7, s4, s9 +; GFX90A-NEXT: s_add_i32 s7, s10, s7 +; GFX90A-NEXT: s_add_i32 s7, s7, s11 ; GFX90A-NEXT: s_branch .LBB3_5 ; GFX90A-NEXT: .LBB3_4: ; %bb58 ; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2 -; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX90A-NEXT: s_add_u32 s16, s16, s0 -; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[4:5] -; GFX90A-NEXT: s_addc_u32 s17, s17, s1 +; GFX90A-NEXT: v_add_co_u32_sdwa v12, vcc, v12, v28 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX90A-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v13, vcc +; GFX90A-NEXT: v_mov_b32_e32 v24, s3 +; GFX90A-NEXT: v_add_co_u32_e32 v14, vcc, s2, v14 +; GFX90A-NEXT: v_addc_co_u32_e32 v15, vcc, v15, v24, vcc +; GFX90A-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[12:13] ; GFX90A-NEXT: s_cbranch_vccz .LBB3_1 ; GFX90A-NEXT: .LBB3_5: ; %bb16 ; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1 ; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX90A-NEXT: s_add_u32 s20, s16, s18 -; GFX90A-NEXT: s_addc_u32 s21, s17, s5 -; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc +; GFX90A-NEXT: v_mov_b32_e32 v25, s7 +; GFX90A-NEXT: v_add_co_u32_e32 v24, vcc, s8, v14 +; GFX90A-NEXT: v_addc_co_u32_e32 v25, vcc, v15, v25, vcc +; GFX90A-NEXT: global_load_dword v30, v[24:25], off offset:-12 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc +; GFX90A-NEXT: global_load_dword v29, v[24:25], off offset:-8 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] offset:-4 glc +; GFX90A-NEXT: global_load_dword v26, v[24:25], off offset:-4 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] glc +; GFX90A-NEXT: global_load_dword v26, v[24:25], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: ds_read_b64 v[14:15], v19 -; GFX90A-NEXT: ds_read_b64 v[16:17], v0 -; GFX90A-NEXT: s_and_b64 vcc, exec, s[14:15] -; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21 +; GFX90A-NEXT: ; kill: killed $vgpr24 killed $vgpr25 +; GFX90A-NEXT: ds_read_b64 v[24:25], v1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ds_read_b64 v[26:27], v0 +; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_cbranch_vccnz .LBB3_4 ; GFX90A-NEXT: ; %bb.6: ; %bb51 ; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2 -; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21 -; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX90A-NEXT: v_cvt_f32_f16_e32 v20, v20 -; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[0:1], v[14:15] -; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[14:15], 0 op_sel_hi:[1,0] -; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17] -; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15] -; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[24:25] -; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27] -; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17] -; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15] +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v31, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v30, v30 +; GFX90A-NEXT: v_cvt_f32_f16_sdwa v33, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX90A-NEXT: v_cvt_f32_f16_e32 v32, v29 +; GFX90A-NEXT: v_pk_add_f32 v[34:35], v[2:3], v[24:25] +; GFX90A-NEXT: v_pk_add_f32 v[36:37], v[24:25], 0 op_sel_hi:[1,0] +; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[30:31], v[26:27] +; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[32:33], v[24:25] +; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[16:17], v[34:35] +; GFX90A-NEXT: v_pk_add_f32 v[18:19], v[18:19], v[36:37] +; GFX90A-NEXT: v_pk_add_f32 v[20:21], v[20:21], v[26:27] +; GFX90A-NEXT: v_pk_add_f32 v[22:23], v[22:23], v[24:25] ; GFX90A-NEXT: s_branch .LBB3_4 bb: %i = load volatile i16, i16 addrspace(4)* undef, align 2 @@ -864,133 +872,133 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] s[0:15] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_mov_b32_e32 v34, s15 +; GFX908-NEXT: v_mov_b32_e32 v39, s15 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a31, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s14 +; GFX908-NEXT: v_accvgpr_write_b32 a31, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s14 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a30, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s13 +; GFX908-NEXT: v_accvgpr_write_b32 a30, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s13 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a29, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s12 +; GFX908-NEXT: v_accvgpr_write_b32 a29, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s12 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a28, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s11 +; GFX908-NEXT: v_accvgpr_write_b32 a28, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s11 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a27, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s10 +; GFX908-NEXT: v_accvgpr_write_b32 a27, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s10 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a26, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s9 +; GFX908-NEXT: v_accvgpr_write_b32 a26, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s9 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a25, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s8 +; GFX908-NEXT: v_accvgpr_write_b32 a25, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s8 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a24, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s7 +; GFX908-NEXT: v_accvgpr_write_b32 a24, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a23, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s6 +; GFX908-NEXT: v_accvgpr_write_b32 a23, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s6 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a22, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s5 +; GFX908-NEXT: v_accvgpr_write_b32 a22, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s5 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a21, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s4 +; GFX908-NEXT: v_accvgpr_write_b32 a21, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s4 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a20, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a20, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s3 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a19, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s2 +; GFX908-NEXT: v_accvgpr_write_b32 a19, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a18, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s1 +; GFX908-NEXT: v_accvgpr_write_b32 a18, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a17, v34 -; GFX908-NEXT: v_mov_b32_e32 v34, s0 +; GFX908-NEXT: v_accvgpr_write_b32 a17, v39 +; GFX908-NEXT: v_mov_b32_e32 v39, s0 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a16, v34 +; GFX908-NEXT: v_accvgpr_write_b32 a16, v39 ; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX908-NEXT: s_nop 7 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v39, a0 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v38, a11 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v37, a12 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v36, a13 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_read_b32 v35, a14 ; Reload Reuse -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse -; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: s_nop 0 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a3 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a4 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a5 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a6 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a7 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a8 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a9 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: v_accvgpr_read_b32 v39, a10 ; Reload Reuse ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: v_accvgpr_read_b32 v34, a15 ; Reload Reuse +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a1 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a32, v34 -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a32, v39 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a0, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a1, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a2, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a4, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a5, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a6, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a7, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a8, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse -; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: v_accvgpr_write_b32 a9, v39 ; Reload Reuse +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse +; GFX908-NEXT: v_accvgpr_write_b32 a10, v39 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a11, v38 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a12, v37 ; Reload Reuse ; GFX908-NEXT: v_accvgpr_write_b32 a13, v36 ; Reload Reuse diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir new file mode 100644 index 0000000000000..30bad29c3e504 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=prologepilog,postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s + +--- +name: standard +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX908-LABEL: name: standard + ; GFX908: liveins: $vgpr0, $vgpr1 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; GFX908-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 + $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 +... + +--- +name: src_is_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0.entry: + ; GFX908-LABEL: name: src_is_spill + ; GFX908: liveins: $vgpr0, $vgpr1 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; GFX908-NEXT: $agpr0_agpr1 = IMPLICIT_DEF + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr2_agpr3, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0 + $agpr0_agpr1 = IMPLICIT_DEF + SI_SPILL_AV64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) + $agpr2_agpr3 = COPY $agpr0_agpr1, implicit $exec + S_ENDPGM 0 +... + +--- +name: overlapping_agpr +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1_agpr2_agpr3 + ; GFX908-LABEL: name: overlapping_agpr + ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr1_agpr2_agpr3_agpr4 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr1_agpr2_agpr3_agpr4 + $agpr1_agpr2_agpr3_agpr4 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec + S_ENDPGM 0, implicit $agpr1_agpr2_agpr3_agpr4 +... diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll index d79e70486d155..de4792af7f4d4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -45,14 +45,14 @@ define amdgpu_ps void @test_complex_reg_offset(float addrspace(1)* %out) { } ; GCN-LABEL: name: test_sgpr_plus_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr2 ; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, define amdgpu_ps void @test_sgpr_plus_imm_offset(i8 addrspace(4)* inreg %base, i32 inreg %offset, @@ -67,14 +67,14 @@ define amdgpu_ps void @test_sgpr_plus_imm_offset(i8 addrspace(4)* inreg %base, i } ; GCN-LABEL: name: test_sgpr_plus_imm_offset_x2 -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr2 ; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; SDAG: S_LOAD_DWORDX2_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORDX2_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base, i32 inreg %offset, @@ -89,18 +89,18 @@ define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base } ; GCN-LABEL: name: test_buffer_load_sgpr_plus_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2 -; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3 -; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr4 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = PRED_COPY $sgpr2 +; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = PRED_COPY $sgpr3 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = PRED_COPY $sgpr4 ; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 77, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2 -; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3 -; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = PRED_COPY $sgpr2 +; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = PRED_COPY $sgpr3 +; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = PRED_COPY $sgpr4 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77, define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, i32 addrspace(1)* inreg %out) { @@ -111,19 +111,19 @@ define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %ba } ; GCN-LABEL: name: test_buffer_load_sgpr_or_imm_offset -; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 -; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 -; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2 -; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3 -; SDAG-DAG: %[[INDEX:.*]]:sgpr_32 = COPY $sgpr4 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = PRED_COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = PRED_COPY $sgpr1 +; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = PRED_COPY $sgpr2 +; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = PRED_COPY $sgpr3 +; SDAG-DAG: %[[INDEX:.*]]:sgpr_32 = PRED_COPY $sgpr4 ; SDAG-DAG: %[[SHIFT:.*]]:sreg_32 = S_LSHL_B32 %[[INDEX]], ; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[SHIFT]], 5, -; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0 -; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1 -; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2 -; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3 -; GISEL-DAG: %[[INDEX:.*]]:sreg_32 = COPY $sgpr4 +; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = PRED_COPY $sgpr0 +; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = PRED_COPY $sgpr1 +; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = PRED_COPY $sgpr2 +; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = PRED_COPY $sgpr3 +; GISEL-DAG: %[[INDEX:.*]]:sreg_32 = PRED_COPY $sgpr4 ; GISEL-DAG: %[[SHIFT:.*]]:sreg_32 = S_LSHL_B32 %[[INDEX]], ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[SHIFT]], 5, diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 49f016407c108..bce6ba45c3121 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -54,49 +54,44 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: s_mul_i32 s0, s0, s3 -; GFX6-NEXT: s_sub_i32 s0, s2, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s3 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: udiv_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 ; GFX9-NEXT: s_sub_i32 s4, 0, s3 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s5, v0 -; GFX9-NEXT: s_mul_i32 s4, s4, s5 -; GFX9-NEXT: s_mul_hi_u32 s4, s5, s4 -; GFX9-NEXT: s_add_i32 s5, s5, s4 -; GFX9-NEXT: s_mul_hi_u32 s4, s2, s5 -; GFX9-NEXT: s_mul_i32 s5, s4, s3 -; GFX9-NEXT: s_sub_i32 s2, s2, s5 -; GFX9-NEXT: s_add_i32 s6, s4, 1 -; GFX9-NEXT: s_sub_i32 s5, s2, s3 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s4, s6, s4 -; GFX9-NEXT: s_cselect_b32 s2, s5, s2 -; GFX9-NEXT: s_add_i32 s5, s4, 1 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s2, s5, s4 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v1, s4, v0 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s2, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm %r = udiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out @@ -150,18 +145,16 @@ define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0 ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: s_mul_i32 s0, s0, s3 -; GFX6-NEXT: s_sub_i32 s0, s2, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s3 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s3 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s3 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s3, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s3, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; @@ -255,68 +248,63 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_xor_b32 s2, s2, s9 ; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0 ; GFX6-NEXT: s_mov_b32 s4, s0 -; GFX6-NEXT: s_xor_b32 s0, s9, s8 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v0 -; GFX6-NEXT: s_mul_i32 s1, s1, s3 -; GFX6-NEXT: s_sub_i32 s1, s2, s1 -; GFX6-NEXT: s_sub_i32 s2, s1, s3 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s1, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s1, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s4, s3, 31 ; GFX9-NEXT: s_add_i32 s3, s3, s4 ; GFX9-NEXT: s_xor_b32 s3, s3, s4 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX9-NEXT: s_ashr_i32 s5, s2, 31 -; GFX9-NEXT: s_add_i32 s2, s2, s5 -; GFX9-NEXT: s_xor_b32 s4, s5, s4 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s2, s2, s5 ; GFX9-NEXT: s_sub_i32 s5, 0, s3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s6, v0 -; GFX9-NEXT: s_mul_i32 s5, s5, s6 -; GFX9-NEXT: s_mul_hi_u32 s5, s6, s5 -; GFX9-NEXT: s_add_i32 s6, s6, s5 -; GFX9-NEXT: s_mul_hi_u32 s5, s2, s6 -; GFX9-NEXT: s_mul_i32 s6, s5, s3 -; GFX9-NEXT: s_sub_i32 s2, s2, s6 -; GFX9-NEXT: s_add_i32 s7, s5, 1 -; GFX9-NEXT: s_sub_i32 s6, s2, s3 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s5, s7, s5 -; GFX9-NEXT: s_cselect_b32 s2, s6, s2 -; GFX9-NEXT: s_add_i32 s6, s5, 1 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s2, s6, s5 -; GFX9-NEXT: s_xor_b32 s2, s2, s4 -; GFX9-NEXT: s_sub_i32 s2, s2, s4 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v1, s5, v0 +; GFX9-NEXT: s_ashr_i32 s5, s2, 31 +; GFX9-NEXT: s_add_i32 s2, s2, s5 +; GFX9-NEXT: s_xor_b32 s2, s2, s5 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: s_xor_b32 s4, s5, s4 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s2, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm %r = sdiv i32 %x, %y store i32 %r, i32 addrspace(1)* %out @@ -384,18 +372,16 @@ define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX6-NEXT: v_readfirstlane_b32 s7, v0 -; GFX6-NEXT: s_mul_i32 s7, s7, s4 -; GFX6-NEXT: s_sub_i32 s6, s6, s7 -; GFX6-NEXT: s_sub_i32 s7, s6, s4 -; GFX6-NEXT: s_cmp_ge_u32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_sub_i32 s7, s6, s4 -; GFX6-NEXT: s_cmp_ge_u32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s4, s7, s6 -; GFX6-NEXT: s_xor_b32 s4, s4, s5 -; GFX6-NEXT: s_sub_i32 s4, s4, s5 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s5, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -619,15 +605,15 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: s_xor_b32 s4, s4, s5 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_or_b32 s4, s4, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s4 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -693,24 +679,24 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_ashr_i32 s5, s4, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s5 -; GFX6-NEXT: s_sext_i32_i16 s2, s4 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s2 -; GFX6-NEXT: s_xor_b32 s2, s2, s5 +; GFX6-NEXT: s_ashr_i32 s2, s4, 16 +; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GFX6-NEXT: s_sext_i32_i16 s3, s4 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s3 +; GFX6-NEXT: s_xor_b32 s3, s3, s2 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GFX6-NEXT: s_ashr_i32 s2, s2, 30 -; GFX6-NEXT: s_or_b32 s6, s2, 1 +; GFX6-NEXT: s_ashr_i32 s3, s3, 30 +; GFX6-NEXT: s_or_b32 s3, s3, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[2:3], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GFX6-NEXT: s_cselect_b32 s2, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, s5 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -923,15 +909,15 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: s_xor_b32 s4, s4, s5 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_or_b32 s4, s4, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s4 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -999,22 +985,22 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s2, s4, 0x80008 ; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s2 -; GFX6-NEXT: s_sext_i32_i8 s3, s4 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s3 -; GFX6-NEXT: s_xor_b32 s2, s3, s2 +; GFX6-NEXT: s_sext_i32_i8 s5, s4 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s5 +; GFX6-NEXT: s_xor_b32 s2, s5, s2 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s2, s2, 30 -; GFX6-NEXT: s_lshr_b32 s5, s4, 8 -; GFX6-NEXT: s_or_b32 s6, s2, 1 +; GFX6-NEXT: s_or_b32 s2, s2, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s2 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[2:3], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GFX6-NEXT: s_cselect_b32 s2, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, s5 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: s_lshr_b32 s3, s4, 8 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s3 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 @@ -1188,100 +1174,88 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; ; GFX6-LABEL: udiv_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0xd -; GFX6-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s19, 0xf000 -; GFX6-NEXT: s_mov_b32 s18, -1 +; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s15, 0xf000 +; GFX6-NEXT: s_mov_b32 s14, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s12 -; GFX6-NEXT: s_sub_i32 s2, 0, s12 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s13 -; GFX6-NEXT: v_cvt_f32_u32_e32 v4, s14 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX6-NEXT: s_sub_i32 s2, 0, s8 +; GFX6-NEXT: v_cvt_f32_u32_e32 v4, s10 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_cvt_f32_u32_e32 v6, s15 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_u32_e32 v6, s11 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s12 -; GFX6-NEXT: s_sub_i32 s2, s8, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s12 -; GFX6-NEXT: s_cmp_ge_u32 s2, s12 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s2, s12 -; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX6-NEXT: s_sub_i32 s4, 0, s13 -; GFX6-NEXT: v_mul_lo_u32 v3, s4, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX6-NEXT: s_sub_i32 s2, 0, s9 +; GFX6-NEXT: v_mul_lo_u32 v3, s2, v1 +; GFX6-NEXT: s_sub_i32 s2, 0, s10 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[2:3] -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s9, v1 -; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v4 -; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX6-NEXT: v_readfirstlane_b32 s4, v1 -; GFX6-NEXT: s_mul_i32 s4, s4, s13 -; GFX6-NEXT: s_sub_i32 s4, s9, s4 -; GFX6-NEXT: s_sub_i32 s5, s4, s13 -; GFX6-NEXT: s_cmp_ge_u32 s4, s13 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s4, s13 -; GFX6-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GFX6-NEXT: s_sub_i32 s6, 0, s14 -; GFX6-NEXT: v_mul_lo_u32 v5, s6, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, s8 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v5, v1, s9 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s5, v5 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GFX6-NEXT: v_mul_hi_u32 v3, s10, v3 -; GFX6-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v6 -; GFX6-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GFX6-NEXT: v_readfirstlane_b32 s6, v3 -; GFX6-NEXT: s_mul_i32 s6, s6, s14 -; GFX6-NEXT: s_sub_i32 s6, s10, s6 -; GFX6-NEXT: s_sub_i32 s7, s6, s14 -; GFX6-NEXT: s_cmp_ge_u32 s6, s14 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v3 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s6, s14 -; GFX6-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GFX6-NEXT: s_sub_i32 s8, 0, s15 -; GFX6-NEXT: v_mul_lo_u32 v7, s8, v5 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v3 -; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX6-NEXT: v_cndmask_b32_e64 v2, v3, v6, s[6:7] -; GFX6-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GFX6-NEXT: v_mul_hi_u32 v5, s11, v5 -; GFX6-NEXT: v_readfirstlane_b32 s0, v5 -; GFX6-NEXT: s_mul_i32 s0, s0, s15 -; GFX6-NEXT: s_sub_i32 s0, s11, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s15 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX6-NEXT: s_cmp_ge_u32 s0, s15 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v3 -; GFX6-NEXT: s_cmp_ge_u32 s0, s15 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] +; GFX6-NEXT: v_mul_lo_u32 v4, s2, v2 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s9, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] +; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4 +; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v1 +; GFX6-NEXT: s_sub_i32 s0, 0, s11 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v6 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX6-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX6-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GFX6-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v2, s10 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s6, v3 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v3 +; GFX6-NEXT: v_mul_hi_u32 v5, v4, v5 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s10, v3 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GFX6-NEXT: v_mul_hi_u32 v4, s7, v4 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v2 +; GFX6-NEXT: v_mul_lo_u32 v6, v4, s11 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s7, v6 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: udiv_v4i32: @@ -1293,87 +1267,79 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s9 ; GFX9-NEXT: s_sub_i32 s2, 0, s8 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s10 +; GFX9-NEXT: s_sub_i32 s3, 0, s9 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX9-NEXT: v_cvt_f32_u32_e32 v5, s10 +; GFX9-NEXT: v_cvt_f32_u32_e32 v6, s11 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_readfirstlane_b32 s3, v0 -; GFX9-NEXT: s_mul_i32 s2, s2, s3 -; GFX9-NEXT: s_mul_hi_u32 s2, s3, s2 -; GFX9-NEXT: s_add_i32 s3, s3, s2 -; GFX9-NEXT: s_mul_hi_u32 s2, s4, s3 -; GFX9-NEXT: s_mul_i32 s3, s2, s8 -; GFX9-NEXT: s_sub_i32 s3, s4, s3 -; GFX9-NEXT: s_add_i32 s13, s2, 1 -; GFX9-NEXT: s_sub_i32 s4, s3, s8 -; GFX9-NEXT: s_cmp_ge_u32 s3, s8 -; GFX9-NEXT: s_cselect_b32 s2, s13, s2 -; GFX9-NEXT: s_cselect_b32 s3, s4, s3 -; GFX9-NEXT: s_add_i32 s4, s2, 1 -; GFX9-NEXT: s_cmp_ge_u32 s3, s8 -; GFX9-NEXT: v_readfirstlane_b32 s12, v1 -; GFX9-NEXT: s_cselect_b32 s2, s4, s2 -; GFX9-NEXT: s_sub_i32 s3, 0, s9 -; GFX9-NEXT: s_mul_i32 s3, s3, s12 -; GFX9-NEXT: s_mul_hi_u32 s3, s12, s3 -; GFX9-NEXT: s_add_i32 s12, s12, s3 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v2 -; GFX9-NEXT: s_mul_hi_u32 s3, s5, s12 -; GFX9-NEXT: s_mul_i32 s4, s3, s9 -; GFX9-NEXT: s_sub_i32 s4, s5, s4 -; GFX9-NEXT: s_add_i32 s8, s3, 1 -; GFX9-NEXT: s_sub_i32 s5, s4, s9 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: s_cmp_ge_u32 s4, s9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_cselect_b32 s3, s8, s3 -; GFX9-NEXT: s_cselect_b32 s4, s5, s4 -; GFX9-NEXT: s_add_i32 s5, s3, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s9 -; GFX9-NEXT: s_cselect_b32 s3, s5, s3 -; GFX9-NEXT: v_readfirstlane_b32 s5, v0 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s11 -; GFX9-NEXT: s_sub_i32 s4, 0, s10 -; GFX9-NEXT: s_mul_i32 s4, s4, s5 -; GFX9-NEXT: s_mul_hi_u32 s4, s5, s4 -; GFX9-NEXT: s_add_i32 s5, s5, s4 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_mul_hi_u32 s4, s6, s5 -; GFX9-NEXT: s_mul_i32 s5, s4, s10 -; GFX9-NEXT: s_sub_i32 s5, s6, s5 -; GFX9-NEXT: s_add_i32 s6, s4, 1 -; GFX9-NEXT: s_sub_i32 s8, s5, s10 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: s_cmp_ge_u32 s5, s10 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_cselect_b32 s4, s6, s4 -; GFX9-NEXT: s_cselect_b32 s5, s8, s5 -; GFX9-NEXT: s_add_i32 s6, s4, 1 -; GFX9-NEXT: s_cmp_ge_u32 s5, s10 -; GFX9-NEXT: s_cselect_b32 s4, s6, s4 -; GFX9-NEXT: s_sub_i32 s5, 0, s11 -; GFX9-NEXT: v_readfirstlane_b32 s6, v0 -; GFX9-NEXT: s_mul_i32 s5, s5, s6 -; GFX9-NEXT: s_mul_hi_u32 s5, s6, s5 -; GFX9-NEXT: s_add_i32 s6, s6, s5 -; GFX9-NEXT: s_mul_hi_u32 s5, s7, s6 -; GFX9-NEXT: s_mul_i32 s6, s5, s11 -; GFX9-NEXT: s_sub_i32 s6, s7, s6 -; GFX9-NEXT: s_add_i32 s7, s5, 1 -; GFX9-NEXT: s_sub_i32 s8, s6, s11 -; GFX9-NEXT: s_cmp_ge_u32 s6, s11 -; GFX9-NEXT: s_cselect_b32 s5, s7, s5 -; GFX9-NEXT: s_cselect_b32 s6, s8, s6 -; GFX9-NEXT: s_add_i32 s7, s5, 1 -; GFX9-NEXT: s_cmp_ge_u32 s6, s11 -; GFX9-NEXT: s_cselect_b32 s5, s7, s5 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GFX9-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX9-NEXT: s_sub_i32 s2, 0, s10 +; GFX9-NEXT: v_mul_lo_u32 v3, s3, v1 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v5 +; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s8 +; GFX9-NEXT: v_add_u32_e32 v7, 1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, s8, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; GFX9-NEXT: v_mul_lo_u32 v3, s2, v2 +; GFX9-NEXT: s_sub_i32 s2, 0, s11 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, s9 +; GFX9-NEXT: v_add_u32_e32 v7, 1, v0 +; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX9-NEXT: v_add_u32_e32 v8, 1, v1 +; GFX9-NEXT: v_sub_u32_e32 v5, s5, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_mul_lo_u32 v3, s2, v6 +; GFX9-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; GFX9-NEXT: v_mul_hi_u32 v3, v6, v3 +; GFX9-NEXT: v_mul_lo_u32 v8, v2, s10 +; GFX9-NEXT: v_subrev_u32_e32 v7, s9, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v3, v6, v3 +; GFX9-NEXT: v_mul_hi_u32 v3, s7, v3 +; GFX9-NEXT: v_add_u32_e32 v7, 1, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 +; GFX9-NEXT: v_sub_u32_e32 v5, s6, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v5 +; GFX9-NEXT: v_subrev_u32_e32 v6, s10, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v3, s11 +; GFX9-NEXT: v_add_u32_e32 v7, 1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v7, 1, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s10, v5 +; GFX9-NEXT: v_sub_u32_e32 v5, s7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX9-NEXT: v_subrev_u32_e32 v6, s11, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s11, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm %r = udiv <4 x i32> %x, %y @@ -1510,85 +1476,77 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX6-NEXT: s_sub_i32 s2, 0, s8 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s9 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX6-NEXT: s_sub_i32 s12, 0, s8 +; GFX6-NEXT: s_sub_i32 s13, 0, s9 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_u32_e32 v3, s10 +; GFX6-NEXT: v_cvt_f32_u32_e32 v5, s11 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s10 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s8 -; GFX6-NEXT: s_sub_i32 s2, s4, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s8 -; GFX6-NEXT: s_cmp_ge_u32 s2, s8 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s8 -; GFX6-NEXT: s_cmp_ge_u32 s2, s8 -; GFX6-NEXT: s_cselect_b32 s4, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s9 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s11 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s9 -; GFX6-NEXT: s_sub_i32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s9 -; GFX6-NEXT: s_cmp_ge_u32 s2, s9 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s9 -; GFX6-NEXT: s_cmp_ge_u32 s2, s9 -; GFX6-NEXT: s_cselect_b32 s5, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s10 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s10 -; GFX6-NEXT: s_sub_i32 s2, s6, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s10 -; GFX6-NEXT: s_cmp_ge_u32 s2, s10 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s10 -; GFX6-NEXT: s_cmp_ge_u32 s2, s10 -; GFX6-NEXT: s_cselect_b32 s6, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s11 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v2, s7, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s5 -; GFX6-NEXT: v_readfirstlane_b32 s4, v2 -; GFX6-NEXT: s_mul_i32 s4, s4, s11 -; GFX6-NEXT: s_sub_i32 s4, s7, s4 -; GFX6-NEXT: s_sub_i32 s5, s4, s11 -; GFX6-NEXT: s_cmp_ge_u32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_sub_i32 s5, s4, s11 -; GFX6-NEXT: s_cmp_ge_u32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: v_mov_b32_e32 v2, s6 -; GFX6-NEXT: v_mov_b32_e32 v3, s4 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, s12, v0 +; GFX6-NEXT: v_mul_lo_u32 v4, s13, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s8 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s9 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: s_sub_i32 s4, 0, s10 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_mul_lo_u32 v3, s4, v2 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s9, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; GFX6-NEXT: s_sub_i32 s4, 0, s11 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v4 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s9, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s4, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_mul_lo_u32 v2, v2, s10 +; GFX6-NEXT: v_mul_hi_u32 v4, v3, v5 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s6, v2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_mul_hi_u32 v3, s7, v3 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s10, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, s11 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s10, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s7, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s11, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s11, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1852,136 +1810,124 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; ; GFX6-LABEL: sdiv_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0xd -; GFX6-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s19, 0xf000 -; GFX6-NEXT: s_mov_b32 s18, -1 +; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s15, 0xf000 +; GFX6-NEXT: s_mov_b32 s14, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_ashr_i32 s2, s12, 31 -; GFX6-NEXT: s_add_i32 s3, s12, s2 +; GFX6-NEXT: s_ashr_i32 s2, s8, 31 +; GFX6-NEXT: s_add_i32 s3, s8, s2 ; GFX6-NEXT: s_xor_b32 s3, s3, s2 ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX6-NEXT: s_sub_i32 s4, 0, s3 +; GFX6-NEXT: s_ashr_i32 s8, s9, 31 +; GFX6-NEXT: s_add_i32 s0, s9, s8 +; GFX6-NEXT: s_xor_b32 s9, s0, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX6-NEXT: s_sub_i32 s1, 0, s3 +; GFX6-NEXT: s_ashr_i32 s0, s4, 31 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0 -; GFX6-NEXT: s_ashr_i32 s4, s8, 31 -; GFX6-NEXT: s_add_i32 s5, s8, s4 -; GFX6-NEXT: s_xor_b32 s5, s5, s4 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_xor_b32 s8, s4, s2 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s3 -; GFX6-NEXT: s_sub_i32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s4, s2, s3 -; GFX6-NEXT: s_cmp_ge_u32 s2, s3 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cselect_b32 s2, s4, s2 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s2, s3 -; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX6-NEXT: s_ashr_i32 s4, s13, 31 -; GFX6-NEXT: s_add_i32 s5, s13, s4 -; GFX6-NEXT: s_xor_b32 s5, s5, s4 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s5 -; GFX6-NEXT: s_sub_i32 s6, 0, s5 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[2:3] -; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX6-NEXT: v_mul_lo_u32 v3, s6, v2 -; GFX6-NEXT: s_ashr_i32 s6, s9, 31 -; GFX6-NEXT: s_add_i32 s7, s9, s6 -; GFX6-NEXT: s_xor_b32 s7, s7, s6 -; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 -; GFX6-NEXT: s_xor_b32 s9, s6, s4 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s7, v2 -; GFX6-NEXT: v_readfirstlane_b32 s4, v2 -; GFX6-NEXT: s_mul_i32 s4, s4, s5 -; GFX6-NEXT: s_sub_i32 s4, s7, s4 -; GFX6-NEXT: s_sub_i32 s6, s4, s5 -; GFX6-NEXT: s_cmp_ge_u32 s4, s5 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v2 -; GFX6-NEXT: s_cselect_b32 s4, s6, s4 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s4, s5 -; GFX6-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GFX6-NEXT: s_ashr_i32 s6, s14, 31 -; GFX6-NEXT: s_add_i32 s7, s14, s6 -; GFX6-NEXT: s_xor_b32 s7, s7, s6 -; GFX6-NEXT: v_cvt_f32_u32_e32 v4, s7 -; GFX6-NEXT: s_sub_i32 s12, 0, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v2 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] -; GFX6-NEXT: v_xor_b32_e32 v2, s9, v2 -; GFX6-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 -; GFX6-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX6-NEXT: v_mul_lo_u32 v5, s12, v4 -; GFX6-NEXT: s_ashr_i32 s12, s10, 31 -; GFX6-NEXT: s_add_i32 s10, s10, s12 -; GFX6-NEXT: s_xor_b32 s10, s10, s12 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: s_xor_b32 s2, s0, s2 +; GFX6-NEXT: v_mul_lo_u32 v2, s1, v0 +; GFX6-NEXT: s_add_i32 s1, s4, s0 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX6-NEXT: s_xor_b32 s1, s1, s0 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX6-NEXT: s_sub_i32 s0, 0, s9 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX6-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX6-NEXT: v_mul_lo_u32 v3, v0, s3 +; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s3, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 +; GFX6-NEXT: s_ashr_i32 s0, s5, 31 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: s_add_i32 s1, s5, s0 +; GFX6-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX6-NEXT: s_ashr_i32 s3, s10, 31 +; GFX6-NEXT: s_xor_b32 s1, s1, s0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 +; GFX6-NEXT: s_xor_b32 s2, s0, s8 +; GFX6-NEXT: s_add_i32 s0, s10, s3 +; GFX6-NEXT: s_xor_b32 s4, s0, s3 +; GFX6-NEXT: v_cvt_f32_u32_e32 v3, s4 +; GFX6-NEXT: v_mul_hi_u32 v1, s1, v1 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s9 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v1 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s1, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s9, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX6-NEXT: s_sub_i32 s0, 0, s4 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v3 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_mul_hi_u32 v2, v3, v5 +; GFX6-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: s_ashr_i32 s2, s11, 31 +; GFX6-NEXT: s_ashr_i32 s0, s6, 31 +; GFX6-NEXT: s_add_i32 s5, s11, s2 +; GFX6-NEXT: s_add_i32 s1, s6, s0 +; GFX6-NEXT: s_xor_b32 s5, s5, s2 +; GFX6-NEXT: s_xor_b32 s1, s1, s0 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_cvt_f32_u32_e32 v4, s5 +; GFX6-NEXT: v_mul_hi_u32 v2, s1, v2 +; GFX6-NEXT: s_xor_b32 s3, s0, s3 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v2, s4 +; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v2 +; GFX6-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] +; GFX6-NEXT: s_sub_i32 s0, 0, s5 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v4 +; GFX6-NEXT: s_ashr_i32 s0, s7, 31 +; GFX6-NEXT: s_add_i32 s1, s7, s0 +; GFX6-NEXT: s_xor_b32 s1, s1, s0 ; GFX6-NEXT: v_mul_hi_u32 v5, v4, v5 -; GFX6-NEXT: s_xor_b32 s12, s12, s6 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GFX6-NEXT: v_mul_hi_u32 v4, s10, v4 -; GFX6-NEXT: v_readfirstlane_b32 s6, v4 -; GFX6-NEXT: s_mul_i32 s6, s6, s7 -; GFX6-NEXT: s_sub_i32 s6, s10, s6 -; GFX6-NEXT: s_sub_i32 s10, s6, s7 -; GFX6-NEXT: s_cmp_ge_u32 s6, s7 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GFX6-NEXT: s_xor_b32 s2, s0, s2 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GFX6-NEXT: v_mul_hi_u32 v4, s1, v4 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX6-NEXT: v_xor_b32_e32 v2, s3, v2 +; GFX6-NEXT: v_mul_lo_u32 v3, v4, s5 ; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v4 -; GFX6-NEXT: s_cselect_b32 s6, s10, s6 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s6, s7 -; GFX6-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GFX6-NEXT: s_ashr_i32 s10, s15, 31 -; GFX6-NEXT: s_add_i32 s13, s15, s10 -; GFX6-NEXT: s_xor_b32 s13, s13, s10 -; GFX6-NEXT: v_cvt_f32_u32_e32 v6, s13 -; GFX6-NEXT: s_sub_i32 s0, 0, s13 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s5, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] ; GFX6-NEXT: v_add_i32_e32 v5, vcc, 1, v4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[6:7] -; GFX6-NEXT: v_xor_b32_e32 v4, s12, v4 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v1 -; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s9, v2 -; GFX6-NEXT: v_mul_lo_u32 v2, s0, v3 -; GFX6-NEXT: s_ashr_i32 s0, s11, 31 -; GFX6-NEXT: s_add_i32 s1, s11, s0 -; GFX6-NEXT: s_xor_b32 s1, s1, s0 -; GFX6-NEXT: v_mul_hi_u32 v2, v3, v2 -; GFX6-NEXT: s_xor_b32 s0, s0, s10 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s12, v4 -; GFX6-NEXT: v_readfirstlane_b32 s2, v3 -; GFX6-NEXT: s_mul_i32 s2, s2, s13 -; GFX6-NEXT: s_sub_i32 s1, s1, s2 -; GFX6-NEXT: s_sub_i32 s2, s1, s13 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v3 -; GFX6-NEXT: s_cmp_ge_u32 s1, s13 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX6-NEXT: s_cselect_b32 s1, s2, s1 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v3 -; GFX6-NEXT: s_cmp_ge_u32 s1, s13 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX6-NEXT: v_xor_b32_e32 v3, s0, v3 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s0, v3 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX6-NEXT: v_xor_b32_e32 v3, s2, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s2, v3 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_v4i32: @@ -1994,122 +1940,114 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX9-NEXT: s_add_i32 s3, s8, s2 ; GFX9-NEXT: s_xor_b32 s3, s3, s2 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX9-NEXT: s_ashr_i32 s8, s4, 31 -; GFX9-NEXT: s_add_i32 s4, s4, s8 -; GFX9-NEXT: s_xor_b32 s2, s8, s2 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s4, s4, s8 -; GFX9-NEXT: s_sub_i32 s8, 0, s3 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s12, v0 -; GFX9-NEXT: s_mul_i32 s8, s8, s12 -; GFX9-NEXT: s_mul_hi_u32 s8, s12, s8 -; GFX9-NEXT: s_add_i32 s12, s12, s8 -; GFX9-NEXT: s_mul_hi_u32 s8, s4, s12 -; GFX9-NEXT: s_mul_i32 s12, s8, s3 -; GFX9-NEXT: s_sub_i32 s4, s4, s12 -; GFX9-NEXT: s_add_i32 s13, s8, 1 -; GFX9-NEXT: s_sub_i32 s12, s4, s3 -; GFX9-NEXT: s_cmp_ge_u32 s4, s3 -; GFX9-NEXT: s_cselect_b32 s8, s13, s8 -; GFX9-NEXT: s_cselect_b32 s4, s12, s4 -; GFX9-NEXT: s_add_i32 s12, s8, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s3 -; GFX9-NEXT: s_cselect_b32 s3, s12, s8 -; GFX9-NEXT: s_ashr_i32 s4, s9, 31 -; GFX9-NEXT: s_add_i32 s8, s9, s4 -; GFX9-NEXT: s_xor_b32 s8, s8, s4 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX9-NEXT: s_ashr_i32 s9, s5, 31 -; GFX9-NEXT: s_xor_b32 s3, s3, s2 -; GFX9-NEXT: s_add_i32 s5, s5, s9 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s4, s9, s4 -; GFX9-NEXT: s_sub_i32 s2, s3, s2 -; GFX9-NEXT: s_xor_b32 s3, s5, s9 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s5, 0, s8 -; GFX9-NEXT: v_readfirstlane_b32 s9, v0 -; GFX9-NEXT: s_mul_i32 s5, s5, s9 -; GFX9-NEXT: s_mul_hi_u32 s5, s9, s5 -; GFX9-NEXT: s_add_i32 s9, s9, s5 -; GFX9-NEXT: s_mul_hi_u32 s5, s3, s9 -; GFX9-NEXT: s_mul_i32 s9, s5, s8 -; GFX9-NEXT: s_sub_i32 s3, s3, s9 -; GFX9-NEXT: s_add_i32 s12, s5, 1 -; GFX9-NEXT: s_sub_i32 s9, s3, s8 -; GFX9-NEXT: s_cmp_ge_u32 s3, s8 -; GFX9-NEXT: s_cselect_b32 s5, s12, s5 -; GFX9-NEXT: s_cselect_b32 s3, s9, s3 -; GFX9-NEXT: s_add_i32 s9, s5, 1 -; GFX9-NEXT: s_cmp_ge_u32 s3, s8 -; GFX9-NEXT: s_cselect_b32 s3, s9, s5 -; GFX9-NEXT: s_ashr_i32 s5, s10, 31 -; GFX9-NEXT: s_add_i32 s8, s10, s5 -; GFX9-NEXT: s_xor_b32 s8, s8, s5 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX9-NEXT: s_ashr_i32 s9, s6, 31 -; GFX9-NEXT: s_xor_b32 s3, s3, s4 -; GFX9-NEXT: s_add_i32 s6, s6, s9 +; GFX9-NEXT: s_ashr_i32 s12, s9, 31 +; GFX9-NEXT: s_add_i32 s9, s9, s12 +; GFX9-NEXT: s_xor_b32 s9, s9, s12 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s5, s9, s5 -; GFX9-NEXT: s_sub_i32 s3, s3, s4 -; GFX9-NEXT: s_xor_b32 s4, s6, s9 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX9-NEXT: s_sub_i32 s14, 0, s3 +; GFX9-NEXT: s_ashr_i32 s8, s4, 31 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s6, 0, s8 -; GFX9-NEXT: v_readfirstlane_b32 s9, v0 -; GFX9-NEXT: s_mul_i32 s6, s6, s9 -; GFX9-NEXT: s_mul_hi_u32 s6, s9, s6 -; GFX9-NEXT: s_add_i32 s9, s9, s6 -; GFX9-NEXT: s_mul_hi_u32 s6, s4, s9 -; GFX9-NEXT: s_mul_i32 s9, s6, s8 -; GFX9-NEXT: s_sub_i32 s4, s4, s9 -; GFX9-NEXT: s_add_i32 s10, s6, 1 -; GFX9-NEXT: s_sub_i32 s9, s4, s8 -; GFX9-NEXT: s_cmp_ge_u32 s4, s8 -; GFX9-NEXT: s_cselect_b32 s6, s10, s6 -; GFX9-NEXT: s_cselect_b32 s4, s9, s4 -; GFX9-NEXT: s_add_i32 s9, s6, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s8 -; GFX9-NEXT: s_cselect_b32 s4, s9, s6 -; GFX9-NEXT: s_ashr_i32 s6, s11, 31 -; GFX9-NEXT: s_add_i32 s8, s11, s6 -; GFX9-NEXT: s_xor_b32 s8, s8, s6 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_ashr_i32 s2, s7, 31 -; GFX9-NEXT: s_xor_b32 s4, s4, s5 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX9-NEXT: s_add_i32 s7, s7, s2 -; GFX9-NEXT: s_xor_b32 s6, s2, s6 -; GFX9-NEXT: s_sub_i32 s4, s4, s5 +; GFX9-NEXT: s_add_i32 s4, s4, s8 +; GFX9-NEXT: s_xor_b32 s4, s4, s8 +; GFX9-NEXT: v_mul_lo_u32 v2, s14, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: s_xor_b32 s2, s7, s2 -; GFX9-NEXT: s_sub_i32 s5, 0, s8 -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_readfirstlane_b32 s7, v1 -; GFX9-NEXT: s_mul_i32 s5, s5, s7 -; GFX9-NEXT: s_mul_hi_u32 s5, s7, s5 -; GFX9-NEXT: s_add_i32 s7, s7, s5 -; GFX9-NEXT: s_mul_hi_u32 s5, s2, s7 -; GFX9-NEXT: s_mul_i32 s7, s5, s8 -; GFX9-NEXT: s_sub_i32 s2, s2, s7 -; GFX9-NEXT: s_add_i32 s9, s5, 1 -; GFX9-NEXT: s_sub_i32 s7, s2, s8 -; GFX9-NEXT: s_cmp_ge_u32 s2, s8 -; GFX9-NEXT: s_cselect_b32 s5, s9, s5 -; GFX9-NEXT: s_cselect_b32 s2, s7, s2 -; GFX9-NEXT: s_add_i32 s7, s5, 1 -; GFX9-NEXT: s_cmp_ge_u32 s2, s8 -; GFX9-NEXT: s_cselect_b32 s2, s7, s5 -; GFX9-NEXT: s_xor_b32 s2, s2, s6 -; GFX9-NEXT: s_sub_i32 s2, s2, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: s_sub_i32 s14, 0, s9 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX9-NEXT: s_ashr_i32 s13, s5, 31 +; GFX9-NEXT: v_mul_lo_u32 v3, s14, v1 +; GFX9-NEXT: s_add_i32 s5, s5, s13 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_mul_hi_u32 v2, v1, v3 +; GFX9-NEXT: s_xor_b32 s5, s5, s13 +; GFX9-NEXT: s_xor_b32 s2, s8, s2 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s3 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX9-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX9-NEXT: s_ashr_i32 s3, s10, 31 +; GFX9-NEXT: s_add_i32 s4, s10, s3 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: s_xor_b32 s4, s4, s3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s4 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s9 +; GFX9-NEXT: v_add_u32_e32 v5, 1, v1 +; GFX9-NEXT: s_ashr_i32 s8, s11, 31 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX9-NEXT: v_sub_u32_e32 v2, s5, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_subrev_u32_e32 v5, s9, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX9-NEXT: s_sub_i32 s5, 0, s4 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 +; GFX9-NEXT: v_mul_lo_u32 v2, s5, v3 +; GFX9-NEXT: s_add_i32 s9, s11, s8 +; GFX9-NEXT: v_add_u32_e32 v5, 1, v1 +; GFX9-NEXT: s_xor_b32 s9, s9, s8 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_mul_hi_u32 v2, v3, v2 +; GFX9-NEXT: v_cvt_f32_u32_e32 v5, s9 +; GFX9-NEXT: s_ashr_i32 s5, s6, 31 +; GFX9-NEXT: s_add_i32 s6, s6, s5 +; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v5 +; GFX9-NEXT: s_xor_b32 s6, s6, s5 +; GFX9-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_subrev_u32_e32 v0, s2, v0 +; GFX9-NEXT: s_xor_b32 s2, s13, s12 +; GFX9-NEXT: v_mul_lo_u32 v5, v2, s4 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX9-NEXT: v_subrev_u32_e32 v1, s2, v1 +; GFX9-NEXT: s_xor_b32 s2, s5, s3 +; GFX9-NEXT: s_sub_i32 s3, 0, s9 +; GFX9-NEXT: v_mul_lo_u32 v7, s3, v3 +; GFX9-NEXT: v_sub_u32_e32 v5, s6, v5 +; GFX9-NEXT: v_add_u32_e32 v6, 1, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX9-NEXT: v_subrev_u32_e32 v6, s4, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX9-NEXT: v_mul_hi_u32 v6, v3, v7 +; GFX9-NEXT: s_ashr_i32 s3, s7, 31 +; GFX9-NEXT: s_add_i32 s5, s7, s3 +; GFX9-NEXT: s_xor_b32 s5, s5, s3 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v6 +; GFX9-NEXT: v_mul_hi_u32 v3, s5, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v5 +; GFX9-NEXT: v_add_u32_e32 v6, 1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, v3, s9 +; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX9-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX9-NEXT: v_subrev_u32_e32 v2, s2, v2 +; GFX9-NEXT: v_sub_u32_e32 v5, s5, v5 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX9-NEXT: v_subrev_u32_e32 v6, s9, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v5 +; GFX9-NEXT: s_xor_b32 s2, s3, s8 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX9-NEXT: v_xor_b32_e32 v3, s2, v3 +; GFX9-NEXT: v_subrev_u32_e32 v3, s2, v3 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm %r = sdiv <4 x i32> %x, %y @@ -2278,117 +2216,109 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s2, s8, 31 -; GFX6-NEXT: s_add_i32 s3, s8, s2 -; GFX6-NEXT: s_xor_b32 s2, s3, s2 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX6-NEXT: s_sub_i32 s3, 0, s2 +; GFX6-NEXT: s_add_i32 s8, s8, s2 +; GFX6-NEXT: s_xor_b32 s8, s8, s2 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GFX6-NEXT: s_ashr_i32 s13, s9, 31 +; GFX6-NEXT: s_add_i32 s9, s9, s13 +; GFX6-NEXT: s_xor_b32 s9, s9, s13 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_sub_i32 s14, 0, s8 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX6-NEXT: s_ashr_i32 s12, s4, 31 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 -; GFX6-NEXT: s_ashr_i32 s3, s4, 31 -; GFX6-NEXT: s_add_i32 s4, s4, s3 -; GFX6-NEXT: s_xor_b32 s4, s4, s3 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: s_add_i32 s4, s4, s12 +; GFX6-NEXT: s_xor_b32 s4, s4, s12 +; GFX6-NEXT: v_mul_lo_u32 v2, s14, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX6-NEXT: s_sub_i32 s14, 0, s9 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: s_ashr_i32 s13, s5, 31 +; GFX6-NEXT: s_add_i32 s5, s5, s13 +; GFX6-NEXT: s_xor_b32 s5, s5, s13 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_readfirstlane_b32 s8, v0 -; GFX6-NEXT: s_mul_i32 s8, s8, s2 -; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: s_sub_i32 s8, s4, s2 -; GFX6-NEXT: s_cmp_ge_u32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s4, s8, s4 -; GFX6-NEXT: s_sub_i32 s8, s4, s2 -; GFX6-NEXT: s_cmp_ge_u32 s4, s2 -; GFX6-NEXT: s_cselect_b32 s2, s8, s4 -; GFX6-NEXT: s_ashr_i32 s4, s9, 31 -; GFX6-NEXT: s_add_i32 s8, s9, s4 -; GFX6-NEXT: s_xor_b32 s4, s8, s4 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s4 -; GFX6-NEXT: s_sub_i32 s8, 0, s4 -; GFX6-NEXT: s_xor_b32 s2, s2, s3 -; GFX6-NEXT: s_sub_i32 s9, s2, s3 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s8, v0 -; GFX6-NEXT: s_ashr_i32 s8, s5, 31 -; GFX6-NEXT: s_add_i32 s5, s5, s8 -; GFX6-NEXT: s_xor_b32 s5, s5, s8 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s4 -; GFX6-NEXT: s_sub_i32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s4 -; GFX6-NEXT: s_cmp_ge_u32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s4 -; GFX6-NEXT: s_cmp_ge_u32 s2, s4 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_ashr_i32 s3, s10, 31 -; GFX6-NEXT: s_add_i32 s4, s10, s3 -; GFX6-NEXT: s_xor_b32 s3, s4, s3 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX6-NEXT: s_sub_i32 s4, 0, s3 -; GFX6-NEXT: s_xor_b32 s2, s2, s8 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0 -; GFX6-NEXT: s_ashr_i32 s4, s6, 31 -; GFX6-NEXT: s_add_i32 s5, s6, s4 -; GFX6-NEXT: s_xor_b32 s5, s5, s4 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_sub_i32 s6, s2, s8 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s3 -; GFX6-NEXT: s_sub_i32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s5, s2, s3 -; GFX6-NEXT: s_cmp_ge_u32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s5, s2, s3 -; GFX6-NEXT: s_cmp_ge_u32 s2, s3 -; GFX6-NEXT: s_cselect_b32 s5, s5, s2 -; GFX6-NEXT: s_ashr_i32 s2, s11, 31 -; GFX6-NEXT: s_add_i32 s3, s11, s2 -; GFX6-NEXT: s_xor_b32 s8, s3, s2 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX6-NEXT: s_sub_i32 s10, 0, s8 -; GFX6-NEXT: s_xor_b32 s5, s5, s4 -; GFX6-NEXT: s_sub_i32 s4, s5, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: v_mul_lo_u32 v2, s14, v1 ; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, s9 -; GFX6-NEXT: s_ashr_i32 s9, s7, 31 -; GFX6-NEXT: s_add_i32 s7, s7, s9 -; GFX6-NEXT: v_mul_lo_u32 v2, s10, v1 -; GFX6-NEXT: s_xor_b32 s7, s7, s9 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s8 ; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s7, v1 -; GFX6-NEXT: v_mov_b32_e32 v1, s6 -; GFX6-NEXT: v_readfirstlane_b32 s5, v2 -; GFX6-NEXT: s_mul_i32 s5, s5, s8 -; GFX6-NEXT: s_sub_i32 s5, s7, s5 -; GFX6-NEXT: s_sub_i32 s6, s5, s8 -; GFX6-NEXT: s_cmp_ge_u32 s5, s8 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 -; GFX6-NEXT: s_sub_i32 s6, s5, s8 -; GFX6-NEXT: s_cmp_ge_u32 s5, s8 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 -; GFX6-NEXT: s_xor_b32 s5, s5, s9 -; GFX6-NEXT: s_sub_i32 s5, s5, s9 -; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v3, s5 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX6-NEXT: s_ashr_i32 s4, s10, 31 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: s_add_i32 s8, s10, s4 +; GFX6-NEXT: s_xor_b32 s4, s8, s4 +; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s4 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s12, v0 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s9 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s9, v1 +; GFX6-NEXT: s_sub_i32 s5, 0, s4 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_mul_lo_u32 v4, s5, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s9, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v4 +; GFX6-NEXT: s_ashr_i32 s8, s11, 31 +; GFX6-NEXT: s_add_i32 s9, s11, s8 +; GFX6-NEXT: s_ashr_i32 s5, s6, 31 +; GFX6-NEXT: s_xor_b32 s8, s9, s8 +; GFX6-NEXT: s_add_i32 s6, s6, s5 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_cvt_f32_u32_e32 v3, s8 +; GFX6-NEXT: s_xor_b32 s6, s6, s5 +; GFX6-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, s13, v1 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s13, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v2, s4 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s6, v2 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 +; GFX6-NEXT: s_sub_i32 s6, 0, s8 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_mul_lo_u32 v4, s6, v3 +; GFX6-NEXT: s_ashr_i32 s6, s7, 31 +; GFX6-NEXT: s_add_i32 s7, s7, s6 +; GFX6-NEXT: s_xor_b32 s7, s7, s6 +; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GFX6-NEXT: v_mul_hi_u32 v3, s7, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX6-NEXT: v_xor_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, s8 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s5, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s7, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s8, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s8, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s6, v3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3063,70 +2993,70 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s9 ; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; GFX6-NEXT: s_ashr_i32 s6, s6, 16 ; GFX6-NEXT: s_ashr_i32 s8, s8, 30 -; GFX6-NEXT: s_or_b32 s10, s8, 1 +; GFX6-NEXT: s_or_b32 s8, s8, 1 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s8, s10, 0 -; GFX6-NEXT: s_ashr_i32 s6, s6, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GFX6-NEXT: s_ashr_i32 s4, s4, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s4 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v0 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s6 +; GFX6-NEXT: v_mov_b32_e32 v3, s8 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: s_ashr_i32 s4, s4, 16 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v1 ; GFX6-NEXT: s_xor_b32 s4, s4, s6 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_sext_i32_i16 s6, s7 -; GFX6-NEXT: v_mul_f32_e32 v3, v1, v3 +; GFX6-NEXT: s_or_b32 s4, s4, 1 +; GFX6-NEXT: v_mul_f32_e32 v3, v2, v3 ; GFX6-NEXT: v_trunc_f32_e32 v3, v3 -; GFX6-NEXT: v_mad_f32 v1, -v3, v0, v1 +; GFX6-NEXT: v_mad_f32 v2, -v3, v1, v2 +; GFX6-NEXT: v_mov_b32_e32 v4, s4 +; GFX6-NEXT: s_sext_i32_i16 s4, s7 ; GFX6-NEXT: v_cvt_i32_f32_e32 v3, v3 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, s8, v2 -; GFX6-NEXT: s_or_b32 s4, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, s4, v3 -; GFX6-NEXT: s_sext_i32_i16 s4, s5 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v0 -; GFX6-NEXT: s_xor_b32 s4, s4, s6 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v1| +; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX6-NEXT: s_sext_i32_i16 s6, s5 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v1, v3 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s6 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v2 +; GFX6-NEXT: s_xor_b32 s4, s6, s4 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 ; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v4, v1, v4 ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 -; GFX6-NEXT: v_mad_f32 v1, -v4, v0, v1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX6-NEXT: v_mad_f32 v1, -v4, v2, v1 +; GFX6-NEXT: v_mov_b32_e32 v5, s4 +; GFX6-NEXT: s_ashr_i32 s4, s7, 16 ; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: s_ashr_i32 s6, s7, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v4 -; GFX6-NEXT: s_ashr_i32 s4, s5, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v0 -; GFX6-NEXT: s_xor_b32 s4, s4, s6 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v2| +; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX6-NEXT: s_ashr_i32 s5, s5, 16 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s5 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v2 +; GFX6-NEXT: s_xor_b32 s4, s5, s4 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v5, v4, v5 ; GFX6-NEXT: v_trunc_f32_e32 v5, v5 -; GFX6-NEXT: v_mad_f32 v4, -v5, v0, v4 +; GFX6-NEXT: v_mad_f32 v4, -v5, v2, v4 ; GFX6-NEXT: v_cvt_i32_f32_e32 v5, v5 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, |v0| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_mov_b32_e32 v6, s4 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v4|, |v2| +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3334,80 +3264,80 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s8, s8, 30 -; GFX6-NEXT: s_or_b32 s10, s8, 1 +; GFX6-NEXT: s_or_b32 s8, s8, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s8 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s8, s10, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s8, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: s_ashr_i32 s9, s6, 16 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s6 -; GFX6-NEXT: s_ashr_i32 s8, s6, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s8 -; GFX6-NEXT: s_lshr_b32 s10, s4, 16 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s9 +; GFX6-NEXT: s_lshr_b32 s8, s4, 16 +; GFX6-NEXT: s_lshr_b32 s6, s6, 16 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v1 -; GFX6-NEXT: s_xor_b32 s4, s4, s8 +; GFX6-NEXT: s_xor_b32 s4, s4, s9 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v3, v2, v3 ; GFX6-NEXT: v_trunc_f32_e32 v3, v3 ; GFX6-NEXT: v_mad_f32 v2, -v3, v1, v2 ; GFX6-NEXT: v_cvt_i32_f32_e32 v3, v3 -; GFX6-NEXT: s_or_b32 s4, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v2|, |v1| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v3 +; GFX6-NEXT: v_mov_b32_e32 v4, s4 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v1| +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: s_sext_i32_i16 s4, s7 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s6 ; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 ; GFX6-NEXT: s_sext_i32_i16 s6, s5 ; GFX6-NEXT: s_xor_b32 s4, s6, s4 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s10, v1 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s8, v1 ; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s6 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v2 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 ; GFX6-NEXT: s_or_b32 s4, s4, 1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_mov_b32_e32 v5, s4 ; GFX6-NEXT: v_mul_f32_e32 v4, v1, v4 ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 ; GFX6-NEXT: v_mad_f32 v1, -v4, v2, v1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v2| ; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: s_ashr_i32 s6, s7, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s6 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v4 +; GFX6-NEXT: s_ashr_i32 s4, s7, 16 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v2| +; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s7 -; GFX6-NEXT: s_lshr_b32 s8, s7, 16 +; GFX6-NEXT: s_lshr_b32 s6, s7, 16 ; GFX6-NEXT: s_ashr_i32 s7, s5, 16 ; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s7 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v2 -; GFX6-NEXT: s_xor_b32 s6, s7, s6 -; GFX6-NEXT: s_ashr_i32 s6, s6, 30 -; GFX6-NEXT: s_lshr_b32 s4, s5, 16 +; GFX6-NEXT: s_xor_b32 s4, s7, s4 +; GFX6-NEXT: s_ashr_i32 s4, s4, 30 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v5, v4, v5 ; GFX6-NEXT: v_trunc_f32_e32 v5, v5 ; GFX6-NEXT: v_mad_f32 v4, -v5, v2, v4 ; GFX6-NEXT: v_cvt_i32_f32_e32 v5, v5 -; GFX6-NEXT: s_or_b32 s9, s6, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[6:7], |v4|, |v2| -; GFX6-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GFX6-NEXT: s_cselect_b32 s6, s9, 0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, s6, v5 -; GFX6-NEXT: v_mul_lo_u32 v2, v2, s8 +; GFX6-NEXT: v_mov_b32_e32 v6, s4 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v4|, |v2| +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GFX6-NEXT: v_mul_lo_u32 v2, v2, s6 +; GFX6-NEXT: s_lshr_b32 s4, s5, 16 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3688,15 +3618,15 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: s_xor_b32 s4, s4, s5 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_or_b32 s4, s4, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s4 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3766,22 +3696,22 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s2, s4, 0x30008 ; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s2 -; GFX6-NEXT: s_bfe_i32 s3, s4, 0x30000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s3 -; GFX6-NEXT: s_xor_b32 s2, s3, s2 +; GFX6-NEXT: s_bfe_i32 s5, s4, 0x30000 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s5 +; GFX6-NEXT: s_xor_b32 s2, s5, s2 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s2, s2, 30 -; GFX6-NEXT: s_lshr_b32 s5, s4, 8 -; GFX6-NEXT: s_or_b32 s6, s2, 1 +; GFX6-NEXT: s_or_b32 s2, s2, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s2 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[2:3], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GFX6-NEXT: s_cselect_b32 s2, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, s5 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: s_lshr_b32 s3, s4, 8 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s3 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 @@ -4254,54 +4184,54 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s9 ; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; GFX6-NEXT: s_ashr_i32 s6, s6, 16 ; GFX6-NEXT: s_ashr_i32 s8, s8, 30 -; GFX6-NEXT: s_or_b32 s10, s8, 1 +; GFX6-NEXT: s_or_b32 s8, s8, 1 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s8, s10, 0 -; GFX6-NEXT: s_ashr_i32 s6, s6, 16 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s6 +; GFX6-NEXT: v_mov_b32_e32 v3, s8 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GFX6-NEXT: s_ashr_i32 s4, s4, 16 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, s8, v2 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v0 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v1 ; GFX6-NEXT: s_xor_b32 s4, s4, s6 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_sext_i32_i16 s6, s7 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v3, v2, v3 ; GFX6-NEXT: v_trunc_f32_e32 v3, v3 -; GFX6-NEXT: v_mad_f32 v2, -v3, v0, v2 +; GFX6-NEXT: v_mad_f32 v2, -v3, v1, v2 +; GFX6-NEXT: v_mov_b32_e32 v4, s4 +; GFX6-NEXT: s_sext_i32_i16 s4, s7 ; GFX6-NEXT: v_cvt_i32_f32_e32 v3, v3 -; GFX6-NEXT: s_or_b32 s4, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v2|, |v0| -; GFX6-NEXT: v_cvt_f32_i32_e32 v0, s6 -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, s4, v3 -; GFX6-NEXT: s_sext_i32_i16 s4, s5 -; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v0 -; GFX6-NEXT: s_xor_b32 s4, s4, s6 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v1| +; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX6-NEXT: s_sext_i32_i16 s5, s5 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s5 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v2 +; GFX6-NEXT: s_xor_b32 s4, s5, s4 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v4, v3, v4 ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 -; GFX6-NEXT: v_mad_f32 v3, -v4, v0, v3 +; GFX6-NEXT: v_mad_f32 v3, -v4, v2, v3 ; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, |v0| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 -; GFX6-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; GFX6-NEXT: s_endpgm +; GFX6-NEXT: v_mov_b32_e32 v5, s4 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: buffer_store_short v2, off, s[0:3], 0 offset:4 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_v3i16: ; GFX9: ; %bb.0: @@ -4464,37 +4394,37 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GFX6-NEXT: s_ashr_i32 s8, s8, 30 -; GFX6-NEXT: s_or_b32 s10, s8, 1 +; GFX6-NEXT: s_or_b32 s8, s8, 1 +; GFX6-NEXT: v_mov_b32_e32 v3, s8 ; GFX6-NEXT: v_mul_f32_e32 v2, v1, v2 ; GFX6-NEXT: v_trunc_f32_e32 v2, v2 ; GFX6-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v1|, |v0| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s8, s10, 0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s8, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: s_ashr_i32 s9, s6, 16 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, s6 -; GFX6-NEXT: s_ashr_i32 s8, s6, 16 -; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s8 -; GFX6-NEXT: s_lshr_b32 s10, s4, 16 +; GFX6-NEXT: v_cvt_f32_i32_e32 v1, s9 +; GFX6-NEXT: s_lshr_b32 s8, s4, 16 +; GFX6-NEXT: s_lshr_b32 s6, s6, 16 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: s_ashr_i32 s4, s4, 16 ; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v1 -; GFX6-NEXT: s_xor_b32 s4, s4, s8 +; GFX6-NEXT: s_xor_b32 s4, s4, s9 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v3, v2, v3 ; GFX6-NEXT: v_trunc_f32_e32 v3, v3 ; GFX6-NEXT: v_mad_f32 v2, -v3, v1, v2 ; GFX6-NEXT: v_cvt_i32_f32_e32 v3, v3 -; GFX6-NEXT: s_or_b32 s4, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v2|, |v1| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v3 +; GFX6-NEXT: v_mov_b32_e32 v4, s4 ; GFX6-NEXT: s_sext_i32_i16 s4, s7 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v1| ; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, s6 ; GFX6-NEXT: s_sext_i32_i16 s6, s5 ; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s6 @@ -4506,12 +4436,12 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 ; GFX6-NEXT: v_mad_f32 v3, -v4, v2, v3 ; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[8:9], |v3|, |v2| -; GFX6-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX6-NEXT: s_cselect_b32 s4, s4, 0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, s4, v4 +; GFX6-NEXT: v_mov_b32_e32 v5, s4 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_mul_lo_u32 v2, v2, s7 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s10, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s8, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s5, v2 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -4896,7 +4826,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: s_mov_b32 s0, s4 ; GFX6-NEXT: s_lshr_b32 s4, s6, 15 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v1 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, v2, v0 ; GFX6-NEXT: v_and_b32_e32 v3, 0x7fff, v3 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7fff, v6 @@ -5067,54 +4997,54 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s5 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v2 ; GFX6-NEXT: s_xor_b32 s4, s5, s4 -; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_ashr_i32 s4, s4, 30 +; GFX6-NEXT: s_or_b32 s4, s4, 1 ; GFX6-NEXT: v_mul_f32_e32 v4, v3, v4 ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 ; GFX6-NEXT: v_mad_f32 v3, -v4, v2, v3 -; GFX6-NEXT: v_alignbit_b32 v0, s7, v0, 30 -; GFX6-NEXT: s_or_b32 s7, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, |v2| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GFX6-NEXT: v_mov_b32_e32 v5, s4 +; GFX6-NEXT: s_bfe_i32 s4, s8, 0xf000f ; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: s_cselect_b32 s4, s7, 0 -; GFX6-NEXT: s_bfe_i32 s5, s8, 0xf000f -; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s5 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, s4, v4 -; GFX6-NEXT: s_bfe_i32 s4, s6, 0xf000f -; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s4 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| +; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: s_bfe_i32 s5, s6, 0xf000f +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s5 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v3 ; GFX6-NEXT: v_mov_b32_e32 v1, s8 ; GFX6-NEXT: v_alignbit_b32 v1, s9, v1, 30 -; GFX6-NEXT: s_xor_b32 s4, s4, s5 +; GFX6-NEXT: s_xor_b32 s4, s5, s4 ; GFX6-NEXT: v_mul_f32_e32 v5, v4, v5 ; GFX6-NEXT: v_trunc_f32_e32 v5, v5 -; GFX6-NEXT: s_ashr_i32 s4, s4, 30 -; GFX6-NEXT: v_mad_f32 v4, -v5, v2, v4 +; GFX6-NEXT: v_mad_f32 v4, -v5, v3, v4 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 15 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_ashr_i32 s4, s4, 30 ; GFX6-NEXT: v_cvt_i32_f32_e32 v5, v5 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, |v2| -; GFX6-NEXT: v_cvt_f32_i32_e32 v2, v1 -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v4|, |v3| +; GFX6-NEXT: v_cvt_f32_i32_e32 v4, v1 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 +; GFX6-NEXT: s_or_b32 s4, s4, 1 +; GFX6-NEXT: v_alignbit_b32 v0, s7, v0, 30 +; GFX6-NEXT: v_mov_b32_e32 v6, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 15 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, s4, v5 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GFX6-NEXT: v_cvt_f32_i32_e32 v5, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v4 ; GFX6-NEXT: v_xor_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 30, v0 ; GFX6-NEXT: v_or_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, v5, v6 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mad_f32 v5, -v1, v2, v5 +; GFX6-NEXT: v_mad_f32 v5, -v1, v4, v5 ; GFX6-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v2| +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v4| ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX6-NEXT: v_and_b32_e32 v2, 0x7fff, v3 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v3, 0x7fff, v4 +; GFX6-NEXT: v_and_b32_e32 v3, 0x7fff, v3 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 +; GFX6-NEXT: v_and_b32_e32 v2, 0x7fff, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 15, v3 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 @@ -5286,53 +5216,52 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s0, s4 -; GFX6-NEXT: s_bfe_i32 s4, s8, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s4 ; GFX6-NEXT: s_mov_b32 s1, s5 -; GFX6-NEXT: s_bfe_i32 s5, s6, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s5 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v4 -; GFX6-NEXT: s_xor_b32 s4, s5, s4 -; GFX6-NEXT: v_mov_b32_e32 v0, s6 +; GFX6-NEXT: s_bfe_i32 s5, s8, 0xf0000 +; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s5 ; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: v_alignbit_b32 v2, s9, v2, 30 +; GFX6-NEXT: s_bfe_i32 s9, s6, 0xf0000 +; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s9 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v4 +; GFX6-NEXT: s_xor_b32 s5, s9, s5 +; GFX6-NEXT: s_ashr_i32 s5, s5, 30 +; GFX6-NEXT: s_or_b32 s5, s5, 1 ; GFX6-NEXT: v_mul_f32_e32 v6, v5, v6 ; GFX6-NEXT: v_trunc_f32_e32 v6, v6 -; GFX6-NEXT: s_ashr_i32 s4, s4, 30 ; GFX6-NEXT: v_mad_f32 v5, -v6, v4, v5 ; GFX6-NEXT: v_cvt_i32_f32_e32 v6, v6 -; GFX6-NEXT: v_alignbit_b32 v0, s7, v0, 30 -; GFX6-NEXT: s_lshr_b32 s7, s6, 15 -; GFX6-NEXT: v_alignbit_b32 v2, s9, v2, 30 -; GFX6-NEXT: s_lshr_b32 s9, s8, 15 -; GFX6-NEXT: s_or_b32 s10, s4, 1 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, |v4| -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX6-NEXT: s_cselect_b32 s4, s10, 0 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, s4, v6 -; GFX6-NEXT: s_bfe_i32 s4, s8, 0xf000f -; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s4 -; GFX6-NEXT: s_bfe_i32 s5, s6, 0xf000f -; GFX6-NEXT: v_cvt_f32_i32_e32 v6, s5 +; GFX6-NEXT: v_mov_b32_e32 v7, s5 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v4| +; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; GFX6-NEXT: v_mul_lo_u32 v4, v4, s8 +; GFX6-NEXT: s_bfe_i32 s5, s8, 0xf000f +; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s5 +; GFX6-NEXT: s_mov_b32 s0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 +; GFX6-NEXT: s_lshr_b32 s4, s6, 15 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s6, v4 +; GFX6-NEXT: s_bfe_i32 s6, s6, 0xf000f +; GFX6-NEXT: v_cvt_f32_i32_e32 v6, s6 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v7, v5 -; GFX6-NEXT: s_xor_b32 s4, s5, s4 ; GFX6-NEXT: v_and_b32_e32 v3, 0x7fff, v2 -; GFX6-NEXT: s_ashr_i32 s4, s4, 30 +; GFX6-NEXT: s_xor_b32 s5, s6, s5 +; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 15 ; GFX6-NEXT: v_mul_f32_e32 v7, v6, v7 ; GFX6-NEXT: v_trunc_f32_e32 v7, v7 ; GFX6-NEXT: v_mad_f32 v6, -v7, v5, v6 -; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 15 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s6, v4 -; GFX6-NEXT: s_or_b32 s6, s4, 1 +; GFX6-NEXT: s_ashr_i32 s5, s5, 30 ; GFX6-NEXT: v_cvt_i32_f32_e32 v7, v7 -; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v6|, |v5| +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v6|, |v5| ; GFX6-NEXT: v_cvt_f32_i32_e32 v6, v2 -; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GFX6-NEXT: s_or_b32 s5, s5, 1 +; GFX6-NEXT: v_alignbit_b32 v0, s7, v0, 30 +; GFX6-NEXT: v_mov_b32_e32 v8, s5 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7fff, v0 -; GFX6-NEXT: s_cselect_b32 s4, s6, 0 +; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v8, vcc ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 15 -; GFX6-NEXT: v_add_i32_e32 v5, vcc, s4, v7 +; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; GFX6-NEXT: v_cvt_f32_i32_e32 v7, v0 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v6 ; GFX6-NEXT: v_xor_b32_e32 v0, v0, v2 @@ -5343,11 +5272,12 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_mad_f32 v7, -v2, v6, v7 ; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v7|, |v6| +; GFX6-NEXT: s_lshr_b32 s7, s8, 15 ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GFX6-NEXT: v_mul_lo_u32 v5, v5, s9 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_lo_u32 v5, v5, s7 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, v3 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s7, v5 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v5 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7fff, v2 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 @@ -5725,104 +5655,95 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s2, 0x1000, s6 ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX6-NEXT: s_sub_i32 s3, 0, s2 -; GFX6-NEXT: s_lshl_b32 s6, 0x1000, s7 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s6 +; GFX6-NEXT: s_lshl_b32 s3, 0x1000, s7 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s3 +; GFX6-NEXT: s_sub_i32 s0, 0, s2 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s3, v0 -; GFX6-NEXT: s_mul_i32 s3, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s4, s3 -; GFX6-NEXT: s_sub_i32 s4, s3, s2 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX6-NEXT: s_sub_i32 s4, 0, s6 -; GFX6-NEXT: v_mul_lo_u32 v3, s4, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v2, s0, v0 +; GFX6-NEXT: s_sub_i32 s0, 0, s3 +; GFX6-NEXT: v_mul_lo_u32 v3, s0, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[2:3] +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v1 -; GFX6-NEXT: s_mul_i32 s0, s0, s6 -; GFX6-NEXT: s_sub_i32 s0, s5, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s6 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v1 -; GFX6-NEXT: s_cmp_ge_u32 s0, s6 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v1 -; GFX6-NEXT: s_cmp_ge_u32 s0, s6 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_mul_lo_u32 v2, v0, s2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, s3 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s5, v4 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: udiv_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b32 s3, 0x1000, s6 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX9-NEXT: s_lshl_b32 s2, 0x1000, s7 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX9-NEXT: s_sub_i32 s6, 0, s3 +; GFX9-NEXT: s_lshl_b32 s6, 0x1000, s6 +; GFX9-NEXT: s_lshl_b32 s7, 0x1000, s7 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX9-NEXT: s_sub_i32 s2, 0, s6 +; GFX9-NEXT: s_sub_i32 s3, 0, s7 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_readfirstlane_b32 s7, v0 -; GFX9-NEXT: s_mul_i32 s6, s6, s7 -; GFX9-NEXT: s_mul_hi_u32 s6, s7, s6 -; GFX9-NEXT: s_add_i32 s7, s7, s6 -; GFX9-NEXT: s_mul_hi_u32 s6, s4, s7 -; GFX9-NEXT: s_mul_i32 s7, s6, s3 -; GFX9-NEXT: s_sub_i32 s4, s4, s7 -; GFX9-NEXT: s_add_i32 s9, s6, 1 -; GFX9-NEXT: s_sub_i32 s7, s4, s3 -; GFX9-NEXT: s_cmp_ge_u32 s4, s3 -; GFX9-NEXT: s_cselect_b32 s6, s9, s6 -; GFX9-NEXT: s_cselect_b32 s4, s7, s4 -; GFX9-NEXT: s_add_i32 s7, s6, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s3 -; GFX9-NEXT: v_readfirstlane_b32 s8, v1 -; GFX9-NEXT: s_cselect_b32 s3, s7, s6 -; GFX9-NEXT: s_sub_i32 s4, 0, s2 -; GFX9-NEXT: s_mul_i32 s4, s4, s8 -; GFX9-NEXT: s_mul_hi_u32 s4, s8, s4 -; GFX9-NEXT: s_add_i32 s8, s8, s4 -; GFX9-NEXT: s_mul_hi_u32 s4, s5, s8 -; GFX9-NEXT: s_mul_i32 s6, s4, s2 -; GFX9-NEXT: s_sub_i32 s5, s5, s6 -; GFX9-NEXT: s_add_i32 s7, s4, 1 -; GFX9-NEXT: s_sub_i32 s6, s5, s2 -; GFX9-NEXT: s_cmp_ge_u32 s5, s2 -; GFX9-NEXT: s_cselect_b32 s4, s7, s4 -; GFX9-NEXT: s_cselect_b32 s5, s6, s5 -; GFX9-NEXT: s_add_i32 s6, s4, 1 -; GFX9-NEXT: s_cmp_ge_u32 s5, s2 -; GFX9-NEXT: s_cselect_b32 s2, s6, s4 -; GFX9-NEXT: v_mov_b32_e32 v0, s3 -; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, s3, v1 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s6 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, s7 +; GFX9-NEXT: v_add_u32_e32 v5, 1, v0 +; GFX9-NEXT: v_add_u32_e32 v6, 1, v1 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 +; GFX9-NEXT: v_sub_u32_e32 v4, s5, v4 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX9-NEXT: v_subrev_u32_e32 v5, s6, v3 +; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1] +; GFX9-NEXT: v_subrev_u32_e32 v6, s7, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v5, 1, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = udiv <2 x i32> %x, %shl.y @@ -6060,49 +5981,45 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s2, 0x1000, s6 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX6-NEXT: s_sub_i32 s3, 0, s2 -; GFX6-NEXT: s_lshl_b32 s6, 0x1000, s7 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s6 +; GFX6-NEXT: s_lshl_b32 s6, 0x1000, s6 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX6-NEXT: s_lshl_b32 s7, 0x1000, s7 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX6-NEXT: s_sub_i32 s2, 0, s6 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s3, v0 -; GFX6-NEXT: s_mul_i32 s3, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s4, s3 -; GFX6-NEXT: s_sub_i32 s4, s3, s2 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 -; GFX6-NEXT: s_sub_i32 s4, s3, s2 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s4, s4, s3 -; GFX6-NEXT: s_sub_i32 s2, 0, s6 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX6-NEXT: s_sub_i32 s2, 0, s7 +; GFX6-NEXT: v_mul_lo_u32 v3, s2, v1 ; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s7, v0 -; GFX6-NEXT: s_mul_i32 s7, s7, s6 -; GFX6-NEXT: s_sub_i32 s5, s5, s7 -; GFX6-NEXT: s_sub_i32 s7, s5, s6 -; GFX6-NEXT: s_cmp_ge_u32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s7, s5 -; GFX6-NEXT: s_sub_i32 s7, s5, s6 -; GFX6-NEXT: s_cmp_ge_u32 s5, s6 -; GFX6-NEXT: s_cselect_b32 s5, s7, s5 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s6 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s7 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s6, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s6, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -6264,6 +6181,7 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_xor_b32 s2, s2, s9 ; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: s_xor_b32 s8, s9, s8 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0 @@ -6271,62 +6189,56 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: s_mul_i32 s0, s0, s3 -; GFX6-NEXT: s_sub_i32 s0, s2, s0 -; GFX6-NEXT: s_sub_i32 s1, s0, s3 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cselect_b32 s0, s1, s0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s0, s3 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_xor_b32 s0, s9, s8 -; GFX6-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s3, 0x1000, s3 ; GFX9-NEXT: s_ashr_i32 s4, s3, 31 ; GFX9-NEXT: s_add_i32 s3, s3, s4 ; GFX9-NEXT: s_xor_b32 s3, s3, s4 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX9-NEXT: s_sub_i32 s6, 0, s3 -; GFX9-NEXT: s_ashr_i32 s5, s2, 31 -; GFX9-NEXT: s_add_i32 s2, s2, s5 +; GFX9-NEXT: s_sub_i32 s5, 0, s3 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s2, s2, s5 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s7, v0 -; GFX9-NEXT: s_mul_i32 s6, s6, s7 -; GFX9-NEXT: s_mul_hi_u32 s6, s7, s6 -; GFX9-NEXT: s_add_i32 s7, s7, s6 -; GFX9-NEXT: s_mul_hi_u32 s6, s2, s7 -; GFX9-NEXT: s_mul_i32 s8, s6, s3 -; GFX9-NEXT: s_sub_i32 s2, s2, s8 -; GFX9-NEXT: s_add_i32 s7, s6, 1 -; GFX9-NEXT: s_sub_i32 s8, s2, s3 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s6, s7, s6 -; GFX9-NEXT: s_cselect_b32 s2, s8, s2 -; GFX9-NEXT: s_add_i32 s7, s6, 1 -; GFX9-NEXT: s_cmp_ge_u32 s2, s3 -; GFX9-NEXT: s_cselect_b32 s2, s7, s6 -; GFX9-NEXT: s_xor_b32 s3, s5, s4 -; GFX9-NEXT: s_xor_b32 s2, s2, s3 -; GFX9-NEXT: s_sub_i32 s2, s2, s3 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-NEXT: v_mul_lo_u32 v1, s5, v0 +; GFX9-NEXT: s_ashr_i32 s5, s2, 31 +; GFX9-NEXT: s_add_i32 s2, s2, s5 +; GFX9-NEXT: s_xor_b32 s2, s2, s5 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s2, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: s_xor_b32 s2, s5, s4 +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_subrev_u32_e32 v0, s2, v0 +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm %shl.y = shl i32 4096, %y %r = sdiv i32 %x, %shl.y @@ -6534,147 +6446,137 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; ; GFX6-LABEL: sdiv_v2i32_pow2_shl_denom: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s2, 0x1000, s6 -; GFX6-NEXT: s_ashr_i32 s3, s2, 31 -; GFX6-NEXT: s_add_i32 s2, s2, s3 -; GFX6-NEXT: s_xor_b32 s2, s2, s3 +; GFX6-NEXT: s_lshl_b32 s0, 0x1000, s10 +; GFX6-NEXT: s_ashr_i32 s1, s0, 31 +; GFX6-NEXT: s_add_i32 s0, s0, s1 +; GFX6-NEXT: s_xor_b32 s2, s0, s1 ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX6-NEXT: s_sub_i32 s6, 0, s2 -; GFX6-NEXT: s_lshl_b32 s7, 0x1000, s7 +; GFX6-NEXT: s_lshl_b32 s0, 0x1000, s11 +; GFX6-NEXT: s_ashr_i32 s3, s0, 31 +; GFX6-NEXT: s_add_i32 s0, s0, s3 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_sub_i32 s11, 0, s2 +; GFX6-NEXT: s_xor_b32 s10, s0, s3 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s6, v0 -; GFX6-NEXT: s_ashr_i32 s6, s4, 31 -; GFX6-NEXT: s_add_i32 s4, s4, s6 -; GFX6-NEXT: s_xor_b32 s4, s4, s6 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_xor_b32 s6, s6, s3 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_readfirstlane_b32 s3, v0 -; GFX6-NEXT: s_mul_i32 s3, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s4, s3 -; GFX6-NEXT: s_sub_i32 s4, s3, s2 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_cselect_b32 s3, s4, s3 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_ashr_i32 s4, s7, 31 -; GFX6-NEXT: s_add_i32 s7, s7, s4 -; GFX6-NEXT: s_xor_b32 s7, s7, s4 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s7 -; GFX6-NEXT: s_sub_i32 s8, 0, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_lo_u32 v3, s8, v2 -; GFX6-NEXT: s_ashr_i32 s8, s5, 31 -; GFX6-NEXT: s_add_i32 s5, s5, s8 -; GFX6-NEXT: s_xor_b32 s5, s5, s8 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v3 -; GFX6-NEXT: s_xor_b32 s4, s8, s4 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX6-NEXT: v_readfirstlane_b32 s6, v1 -; GFX6-NEXT: s_mul_i32 s6, s6, s7 -; GFX6-NEXT: s_sub_i32 s5, s5, s6 -; GFX6-NEXT: s_sub_i32 s6, s5, s7 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v1 -; GFX6-NEXT: s_cmp_ge_u32 s5, s7 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v1 -; GFX6-NEXT: s_cmp_ge_u32 s5, s7 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s4, v1 -; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX6-NEXT: s_ashr_i32 s0, s8, 31 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: s_add_i32 s8, s8, s0 +; GFX6-NEXT: v_mul_lo_u32 v2, s11, v0 +; GFX6-NEXT: s_xor_b32 s8, s8, s0 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: s_xor_b32 s11, s0, s1 +; GFX6-NEXT: s_sub_i32 s0, 0, s10 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0 +; GFX6-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX6-NEXT: v_mul_lo_u32 v3, v0, s2 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s8, v3 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s2, v3 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX6-NEXT: s_ashr_i32 s0, s9, 31 +; GFX6-NEXT: s_add_i32 s1, s9, s0 +; GFX6-NEXT: s_xor_b32 s1, s1, s0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX6-NEXT: v_mul_hi_u32 v1, s1, v1 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s10 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: s_xor_b32 s2, s0, s3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s1, v2 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v2 +; GFX6-NEXT: v_xor_b32_e32 v0, s11, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s10, v2 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s11, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b32 s2, 0x1000, s6 -; GFX9-NEXT: s_ashr_i32 s3, s2, 31 -; GFX9-NEXT: s_add_i32 s2, s2, s3 -; GFX9-NEXT: s_xor_b32 s2, s2, s3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX9-NEXT: s_lshl_b32 s0, 0x1000, s6 +; GFX9-NEXT: s_ashr_i32 s1, s0, 31 +; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: s_xor_b32 s0, s0, s1 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s0 ; GFX9-NEXT: s_lshl_b32 s6, 0x1000, s7 -; GFX9-NEXT: s_ashr_i32 s7, s4, 31 -; GFX9-NEXT: s_add_i32 s4, s4, s7 +; GFX9-NEXT: s_ashr_i32 s8, s6, 31 +; GFX9-NEXT: s_add_i32 s6, s6, s8 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s3, s7, s3 -; GFX9-NEXT: s_xor_b32 s4, s4, s7 -; GFX9-NEXT: s_sub_i32 s7, 0, s2 +; GFX9-NEXT: s_xor_b32 s6, s6, s8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s6 +; GFX9-NEXT: s_sub_i32 s10, 0, s0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s7, s7, s8 -; GFX9-NEXT: s_mul_hi_u32 s7, s8, s7 -; GFX9-NEXT: s_add_i32 s8, s8, s7 -; GFX9-NEXT: s_mul_hi_u32 s7, s4, s8 -; GFX9-NEXT: s_mul_i32 s8, s7, s2 -; GFX9-NEXT: s_sub_i32 s4, s4, s8 -; GFX9-NEXT: s_add_i32 s9, s7, 1 -; GFX9-NEXT: s_sub_i32 s8, s4, s2 -; GFX9-NEXT: s_cmp_ge_u32 s4, s2 -; GFX9-NEXT: s_cselect_b32 s7, s9, s7 -; GFX9-NEXT: s_cselect_b32 s4, s8, s4 -; GFX9-NEXT: s_add_i32 s8, s7, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s2 -; GFX9-NEXT: s_cselect_b32 s2, s8, s7 -; GFX9-NEXT: s_ashr_i32 s4, s6, 31 -; GFX9-NEXT: s_add_i32 s6, s6, s4 -; GFX9-NEXT: s_xor_b32 s6, s6, s4 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 -; GFX9-NEXT: s_ashr_i32 s7, s5, 31 -; GFX9-NEXT: s_xor_b32 s2, s2, s3 -; GFX9-NEXT: s_add_i32 s5, s5, s7 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s4, s7, s4 -; GFX9-NEXT: s_sub_i32 s2, s2, s3 -; GFX9-NEXT: s_xor_b32 s3, s5, s7 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s5, 0, s6 -; GFX9-NEXT: v_readfirstlane_b32 s7, v0 -; GFX9-NEXT: s_mul_i32 s5, s5, s7 -; GFX9-NEXT: s_mul_hi_u32 s5, s7, s5 -; GFX9-NEXT: s_add_i32 s7, s7, s5 -; GFX9-NEXT: s_mul_hi_u32 s5, s3, s7 -; GFX9-NEXT: s_mul_i32 s7, s5, s6 -; GFX9-NEXT: s_sub_i32 s3, s3, s7 -; GFX9-NEXT: s_add_i32 s8, s5, 1 -; GFX9-NEXT: s_sub_i32 s7, s3, s6 -; GFX9-NEXT: s_cmp_ge_u32 s3, s6 -; GFX9-NEXT: s_cselect_b32 s5, s8, s5 -; GFX9-NEXT: s_cselect_b32 s3, s7, s3 -; GFX9-NEXT: s_add_i32 s7, s5, 1 -; GFX9-NEXT: s_cmp_ge_u32 s3, s6 -; GFX9-NEXT: s_cselect_b32 s3, s7, s5 -; GFX9-NEXT: s_xor_b32 s3, s3, s4 -; GFX9-NEXT: s_sub_i32 s3, s3, s4 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX9-NEXT: s_ashr_i32 s7, s4, 31 +; GFX9-NEXT: s_add_i32 s4, s4, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, s10, v0 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX9-NEXT: s_sub_i32 s10, 0, s6 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX9-NEXT: s_xor_b32 s4, s4, s7 +; GFX9-NEXT: v_mul_lo_u32 v4, s10, v1 +; GFX9-NEXT: s_ashr_i32 s9, s5, 31 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v3 +; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v4 +; GFX9-NEXT: s_add_i32 s5, s5, s9 +; GFX9-NEXT: s_xor_b32 s5, s5, s9 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v4, s4, v4 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s0, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s0, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s0, v3 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s6 +; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX9-NEXT: v_sub_u32_e32 v3, s5, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: s_xor_b32 s1, s7, s1 +; GFX9-NEXT: s_xor_b32 s0, s9, s8 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, s1, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s0, v1 +; GFX9-NEXT: v_subrev_u32_e32 v0, s1, v0 +; GFX9-NEXT: v_subrev_u32_e32 v1, s0, v1 +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm %shl.y = shl <2 x i32> , %y %r = sdiv <2 x i32> %x, %shl.y @@ -6797,20 +6699,18 @@ define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX6-NEXT: v_readfirstlane_b32 s7, v0 -; GFX6-NEXT: s_mul_i32 s7, s7, s4 -; GFX6-NEXT: s_sub_i32 s6, s6, s7 -; GFX6-NEXT: s_sub_i32 s7, s6, s4 -; GFX6-NEXT: s_cmp_ge_u32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s6, s7, s6 -; GFX6-NEXT: s_sub_i32 s7, s6, s4 -; GFX6-NEXT: s_cmp_ge_u32 s6, s4 -; GFX6-NEXT: s_cselect_b32 s4, s7, s6 -; GFX6-NEXT: s_xor_b32 s4, s4, s5 -; GFX6-NEXT: s_sub_i32 s4, s4, s5 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s5, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -7003,61 +6903,57 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: s_lshl_b32 s2, 0x1000, s6 ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 ; GFX6-NEXT: s_add_i32 s2, s2, s3 -; GFX6-NEXT: s_xor_b32 s2, s2, s3 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX6-NEXT: s_sub_i32 s3, 0, s2 -; GFX6-NEXT: s_ashr_i32 s6, s4, 31 +; GFX6-NEXT: s_xor_b32 s6, s2, s3 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX6-NEXT: s_lshl_b32 s7, 0x1000, s7 +; GFX6-NEXT: s_ashr_i32 s8, s7, 31 +; GFX6-NEXT: s_add_i32 s7, s7, s8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_xor_b32 s7, s7, s8 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX6-NEXT: s_sub_i32 s9, 0, s6 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 -; GFX6-NEXT: s_add_i32 s3, s4, s6 -; GFX6-NEXT: s_xor_b32 s3, s3, s6 -; GFX6-NEXT: s_lshl_b32 s4, 0x1000, s7 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0 -; GFX6-NEXT: v_readfirstlane_b32 s7, v0 -; GFX6-NEXT: s_mul_i32 s7, s7, s2 -; GFX6-NEXT: s_sub_i32 s3, s3, s7 -; GFX6-NEXT: s_sub_i32 s7, s3, s2 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s3, s7, s3 -; GFX6-NEXT: s_sub_i32 s7, s3, s2 -; GFX6-NEXT: s_cmp_ge_u32 s3, s2 -; GFX6-NEXT: s_cselect_b32 s7, s7, s3 -; GFX6-NEXT: s_ashr_i32 s2, s4, 31 -; GFX6-NEXT: s_add_i32 s4, s4, s2 -; GFX6-NEXT: s_xor_b32 s4, s4, s2 -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s4 -; GFX6-NEXT: s_sub_i32 s2, 0, s4 -; GFX6-NEXT: s_ashr_i32 s8, s5, 31 -; GFX6-NEXT: s_xor_b32 s7, s7, s6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: s_sub_i32 s6, s7, s6 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: s_ashr_i32 s8, s4, 31 +; GFX6-NEXT: s_add_i32 s4, s4, s8 +; GFX6-NEXT: v_mul_lo_u32 v2, s9, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX6-NEXT: s_xor_b32 s4, s4, s8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: s_sub_i32 s9, 0, s7 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: s_add_i32 s2, s5, s8 -; GFX6-NEXT: s_xor_b32 s5, s2, s8 ; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s7, v0 -; GFX6-NEXT: s_mul_i32 s7, s7, s4 -; GFX6-NEXT: s_sub_i32 s5, s5, s7 -; GFX6-NEXT: s_sub_i32 s7, s5, s4 -; GFX6-NEXT: s_cmp_ge_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s5, s7, s5 -; GFX6-NEXT: s_sub_i32 s7, s5, s4 -; GFX6-NEXT: s_cmp_ge_u32 s5, s4 -; GFX6-NEXT: s_cselect_b32 s4, s7, s5 -; GFX6-NEXT: s_xor_b32 s4, s4, s8 -; GFX6-NEXT: s_sub_i32 s4, s4, s8 -; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_mul_lo_u32 v2, s9, v1 +; GFX6-NEXT: s_ashr_i32 s9, s5, 31 +; GFX6-NEXT: s_add_i32 s5, s5, s9 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s6 +; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: s_xor_b32 s4, s5, s9 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s7 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_xor_b32_e32 v1, s9, v1 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s9, v1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -7183,9 +7079,9 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_mul_lo_u32 v4, v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, s5 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_mul_lo_u32 v4, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v5, v0, v3 ; GFX6-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -7237,13 +7133,12 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], s8, v4 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 1, v0 +; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GFX6-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 2, v0 +; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GFX6-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX6-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GFX6-NEXT: v_mov_b32_e32 v6, s3 ; GFX6-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc ; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s2, v2 @@ -7253,8 +7148,9 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; @@ -7360,33 +7256,33 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v3, s9 ; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_add_u32 s6, s3, 2 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] -; GFX9-NEXT: s_add_u32 s0, s3, 1 -; GFX9-NEXT: s_addc_u32 s6, s2, 0 -; GFX9-NEXT: s_add_u32 s1, s3, 2 -; GFX9-NEXT: s_addc_u32 s9, s2, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-NEXT: s_addc_u32 s0, s2, 0 +; GFX9-NEXT: s_add_u32 s9, s3, 1 +; GFX9-NEXT: s_addc_u32 s1, s2, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s8 -; GFX9-NEXT: s_cmpk_gt_u32 s0, 0x11e -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s7, s7, s8 +; GFX9-NEXT: s_cmpk_gt_u32 s7, 0x11e +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 +; GFX9-NEXT: s_cselect_b32 s8, -1, 0 ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 -; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x11f +; GFX9-NEXT: s_cmpk_eq_i32 s7, 0x11f +; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v4, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s8 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GFX9-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v0, s9 +; GFX9-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %r = udiv i64 %x, 1235195949943 @@ -7535,7 +7431,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: s_movk_i32 s6, 0xf001 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd -; GFX6-NEXT: s_movk_i32 s8, 0xfff +; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 @@ -7543,8 +7439,8 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 12 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_lshr_b64 s[8:9], s[0:1], 12 +; GFX6-NEXT: s_movk_i32 s0, 0xfff ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s6 ; GFX6-NEXT: v_mul_lo_u32 v4, v1, s6 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, s6 @@ -7602,159 +7498,143 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, v1, s8 -; GFX6-NEXT: v_mul_hi_u32 v5, v0, s8 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_mul_lo_u32 v8, v0, s8 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, s0 +; GFX6-NEXT: v_mul_hi_u32 v5, v0, s0 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 2, v0 +; GFX6-NEXT: v_mul_lo_u32 v8, v0, s0 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v0 ; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v5, s3 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s2, v8 ; GFX6-NEXT: v_subb_u32_e32 v4, vcc, v5, v4, vcc -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s8, v8 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s0, v8 ; GFX6-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v4, vcc -; GFX6-NEXT: s_movk_i32 s2, 0xffe -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s2, v5 +; GFX6-NEXT: s_movk_i32 s0, 0xffe +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s0, v5 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 ; GFX6-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; GFX6-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v8 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s2, v8 -; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, -1, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] +; GFX6-NEXT: v_mov_b32_e32 v0, s8 +; GFX6-NEXT: v_mov_b32_e32 v1, s9 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: udiv_v2i64_mixed_pow2k_denom: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, 0x457ff000 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX9-NEXT: v_mac_f32_e32 v0, 0, v1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000 +; GFX9-NEXT: v_madak_f32 v0, 0, v0, 0x457ff000 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: s_movk_i32 s2, 0xf001 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, s2 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s2 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 +; GFX9-NEXT: v_mul_hi_u32 v5, v0, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, v2 +; GFX9-NEXT: v_mul_lo_u32 v6, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v8, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v8, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_mul_hi_u32 v2, v0, s2 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s2 +; GFX9-NEXT: v_mul_lo_u32 v5, v0, s2 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v8, v1, v2 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v6, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v7, vcc +; GFX9-NEXT: v_mul_lo_u32 v7, v1, v5 +; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 +; GFX9-NEXT: s_movk_i32 s0, 0xfff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 12 -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: s_mul_hi_u32 s5, s4, 0xfffff001 -; GFX9-NEXT: v_readfirstlane_b32 s8, v1 -; GFX9-NEXT: s_sub_i32 s5, s5, s4 -; GFX9-NEXT: s_mul_i32 s9, s8, 0xfffff001 -; GFX9-NEXT: s_add_i32 s5, s5, s9 -; GFX9-NEXT: s_mul_i32 s11, s4, 0xfffff001 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s5 -; GFX9-NEXT: s_mul_i32 s10, s4, s5 -; GFX9-NEXT: s_mul_hi_u32 s4, s4, s11 -; GFX9-NEXT: s_add_u32 s4, s4, s10 -; GFX9-NEXT: s_addc_u32 s9, 0, s9 -; GFX9-NEXT: s_mul_hi_u32 s12, s8, s11 -; GFX9-NEXT: s_mul_i32 s11, s8, s11 -; GFX9-NEXT: s_add_u32 s4, s4, s11 -; GFX9-NEXT: s_mul_hi_u32 s10, s8, s5 -; GFX9-NEXT: s_addc_u32 s4, s9, s12 -; GFX9-NEXT: s_addc_u32 s9, s10, 0 -; GFX9-NEXT: s_mul_i32 s5, s8, s5 -; GFX9-NEXT: s_add_u32 s4, s4, s5 -; GFX9-NEXT: s_addc_u32 s5, 0, s9 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s4, s8, s5 -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_hi_u32 s9, s8, 0xfffff001 -; GFX9-NEXT: s_mul_i32 s5, s4, 0xfffff001 -; GFX9-NEXT: s_sub_i32 s9, s9, s8 -; GFX9-NEXT: s_add_i32 s9, s9, s5 -; GFX9-NEXT: s_mul_i32 s11, s8, 0xfffff001 -; GFX9-NEXT: s_mul_hi_u32 s5, s8, s9 -; GFX9-NEXT: s_mul_i32 s10, s8, s9 -; GFX9-NEXT: s_mul_hi_u32 s8, s8, s11 -; GFX9-NEXT: s_add_u32 s8, s8, s10 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 -; GFX9-NEXT: s_mul_hi_u32 s12, s4, s11 -; GFX9-NEXT: s_mul_i32 s11, s4, s11 -; GFX9-NEXT: s_add_u32 s8, s8, s11 -; GFX9-NEXT: s_mul_hi_u32 s10, s4, s9 -; GFX9-NEXT: s_addc_u32 s5, s5, s12 -; GFX9-NEXT: s_addc_u32 s8, s10, 0 -; GFX9-NEXT: s_mul_i32 s9, s4, s9 -; GFX9-NEXT: s_add_u32 s5, s5, s9 -; GFX9-NEXT: s_addc_u32 s8, 0, s8 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s5, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s4, s4, s8 -; GFX9-NEXT: v_readfirstlane_b32 s9, v0 -; GFX9-NEXT: s_mul_i32 s8, s6, s4 -; GFX9-NEXT: s_mul_hi_u32 s10, s6, s9 -; GFX9-NEXT: s_mul_hi_u32 s5, s6, s4 -; GFX9-NEXT: s_add_u32 s8, s10, s8 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 -; GFX9-NEXT: s_mul_hi_u32 s11, s7, s9 -; GFX9-NEXT: s_mul_i32 s9, s7, s9 -; GFX9-NEXT: s_add_u32 s8, s8, s9 -; GFX9-NEXT: s_mul_hi_u32 s10, s7, s4 -; GFX9-NEXT: s_addc_u32 s5, s5, s11 -; GFX9-NEXT: s_addc_u32 s8, s10, 0 -; GFX9-NEXT: s_mul_i32 s4, s7, s4 -; GFX9-NEXT: s_add_u32 s4, s5, s4 -; GFX9-NEXT: s_addc_u32 s5, 0, s8 -; GFX9-NEXT: s_add_u32 s8, s4, 1 -; GFX9-NEXT: s_addc_u32 s9, s5, 0 -; GFX9-NEXT: s_add_u32 s10, s4, 2 -; GFX9-NEXT: s_mul_i32 s13, s5, 0xfff -; GFX9-NEXT: s_mul_hi_u32 s14, s4, 0xfff -; GFX9-NEXT: s_addc_u32 s11, s5, 0 -; GFX9-NEXT: s_add_i32 s14, s14, s13 -; GFX9-NEXT: s_mul_i32 s13, s4, 0xfff -; GFX9-NEXT: v_mov_b32_e32 v0, s13 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s6, v0 -; GFX9-NEXT: s_movk_i32 s12, 0xfff -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s6, s7, s14 -; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s12, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s7, s6, 0 -; GFX9-NEXT: s_movk_i32 s12, 0xffe -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s12, v1 -; GFX9-NEXT: s_cmp_eq_u32 s7, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, s9 -; GFX9-NEXT: v_mov_b32_e32 v3, s11 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s12, v0 -; GFX9-NEXT: s_cmp_eq_u32 s6, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 12 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v8, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, s6, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s6, v0 +; GFX9-NEXT: v_mul_hi_u32 v5, s6, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, s7, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, s7, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, s7, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, s0 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s0 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s6, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s0, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v5, vcc +; GFX9-NEXT: s_movk_i32 s0, 0xffe +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm %r = udiv <2 x i64> %x, @@ -7926,19 +7806,19 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GFX6-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GFX6-NEXT: v_mov_b32_e32 v4, s7 -; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s5, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 ; GFX6-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; GFX6-NEXT: v_mov_b32_e32 v5, s7 +; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s5, v1 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX6-NEXT: s_endpgm ; @@ -8034,40 +7914,40 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: s_mov_b32 s8, 0x9761f7c9 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 ; GFX9-NEXT: s_subb_u32 s6, s1, 0x11f -; GFX9-NEXT: v_subrev_co_u32_e64 v1, s[0:1], s8, v0 +; GFX9-NEXT: v_subrev_co_u32_e64 v3, s[0:1], s8, v0 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 ; GFX9-NEXT: s_subb_u32 s10, s6, 0 ; GFX9-NEXT: s_cmpk_gt_u32 s10, 0x11e ; GFX9-NEXT: s_cselect_b32 s11, -1, 0 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[2:3], s12, v1 +; GFX9-NEXT: v_cmp_lt_u32_e64 s[2:3], s12, v3 ; GFX9-NEXT: s_cmpk_eq_i32 s10, 0x11f -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] ; GFX9-NEXT: v_mov_b32_e32 v4, s11 ; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[2:3] ; GFX9-NEXT: s_subb_u32 s2, s6, 0x11f -; GFX9-NEXT: v_subrev_co_u32_e64 v4, s[0:1], s8, v1 +; GFX9-NEXT: v_subrev_co_u32_e64 v4, s[0:1], s8, v3 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX9-NEXT: s_subb_u32 s2, s2, 0 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v1, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NEXT: s_subb_u32 s0, s2, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s9 -; GFX9-NEXT: s_cmpk_gt_u32 s0, 0x11e -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s2, s7, s9 +; GFX9-NEXT: s_cmpk_gt_u32 s2, 0x11e +; GFX9-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 +; GFX9-NEXT: s_cselect_b32 s3, -1, 0 ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s12, v0 -; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x11f -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: s_cmpk_eq_i32 s2, 0x11f +; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX9-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm @@ -8277,9 +8157,9 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_mov_b32_e32 v0, 0x4f800000 ; GFX6-NEXT: v_madak_f32 v0, 0, v0, 0x4996c7d8 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0xffed2705 -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s5, 0xffed2705 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 @@ -8287,15 +8167,15 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_mov_b32 s0, s4 -; GFX6-NEXT: s_mov_b32 s4, 0x12d8fb -; GFX6-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s8 -; GFX6-NEXT: v_mul_lo_u32 v4, v0, s8 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: s_mov_b32 s1, s5 +; GFX6-NEXT: s_ashr_i32 s8, s3, 31 +; GFX6-NEXT: s_add_u32 s2, s2, s8 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s5 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s5 +; GFX6-NEXT: v_mul_lo_u32 v4, v0, s5 +; GFX6-NEXT: s_mov_b32 s9, s8 +; GFX6-NEXT: s_addc_u32 s3, s3, s8 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -8305,6 +8185,8 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 ; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] +; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc @@ -8312,10 +8194,12 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GFX6-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s8 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s5 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s5 +; GFX6-NEXT: s_mov_b32 s0, 0x12d8fb +; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX6-NEXT: v_mul_lo_u32 v3, v0, s8 +; GFX6-NEXT: v_mul_lo_u32 v3, v0, s5 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, v0, v3 @@ -8330,197 +8214,177 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v7, v5, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX6-NEXT: s_ashr_i32 s8, s7, 31 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GFX6-NEXT: s_add_u32 s6, s6, s8 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: s_mov_b32 s9, s8 -; GFX6-NEXT: s_addc_u32 s7, s7, s8 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] -; GFX6-NEXT: v_mul_lo_u32 v2, s6, v1 -; GFX6-NEXT: v_mul_hi_u32 v3, s6, v0 -; GFX6-NEXT: v_mul_hi_u32 v4, s6, v1 -; GFX6-NEXT: v_mul_hi_u32 v5, s7, v1 -; GFX6-NEXT: v_mul_lo_u32 v1, s7, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, s2, v1 +; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0 +; GFX6-NEXT: v_mul_hi_u32 v4, s2, v1 +; GFX6-NEXT: v_mul_hi_u32 v5, s3, v1 +; GFX6-NEXT: v_mul_lo_u32 v1, s3, v1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, s7, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0 +; GFX6-NEXT: v_mul_lo_u32 v4, s3, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0 +; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, v1, s4 -; GFX6-NEXT: v_mul_hi_u32 v5, v0, s4 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_mul_lo_u32 v8, v0, s4 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, s0 +; GFX6-NEXT: v_mul_hi_u32 v5, v0, s0 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 2, v0 +; GFX6-NEXT: v_mul_lo_u32 v8, v0, s0 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v0 ; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX6-NEXT: v_mov_b32_e32 v5, s7 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s6, v8 +; GFX6-NEXT: v_mov_b32_e32 v5, s3 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s2, v8 ; GFX6-NEXT: v_subb_u32_e32 v4, vcc, v5, v4, vcc -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v8 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s0, v8 ; GFX6-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v4, vcc -; GFX6-NEXT: s_mov_b32 s4, 0x12d8fa -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5 +; GFX6-NEXT: s_mov_b32 s0, 0x12d8fa +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s0, v5 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 ; GFX6-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; GFX6-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v8 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s4, v8 -; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, -1, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[0:1] +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 ; GFX6-NEXT: v_xor_b32_e32 v1, s8, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, s8 ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc -; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_i64_oddk_denom: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4996c7d8 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX9-NEXT: v_mac_f32_e32 v0, 0, v1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000 +; GFX9-NEXT: v_madak_f32 v0, 0, v0, 0x4996c7d8 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_mov_b32 s2, 0xffed2705 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: v_readfirstlane_b32 s5, v0 -; GFX9-NEXT: s_mul_hi_u32 s6, s5, 0xffed2705 -; GFX9-NEXT: s_mul_i32 s7, s4, 0xffed2705 -; GFX9-NEXT: s_add_i32 s6, s6, s7 -; GFX9-NEXT: s_sub_i32 s6, s6, s5 -; GFX9-NEXT: s_mul_i32 s9, s5, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s7, s5, s6 -; GFX9-NEXT: s_mul_i32 s8, s5, s6 -; GFX9-NEXT: s_mul_hi_u32 s5, s5, s9 -; GFX9-NEXT: s_add_u32 s5, s5, s8 -; GFX9-NEXT: s_addc_u32 s7, 0, s7 -; GFX9-NEXT: s_mul_hi_u32 s10, s4, s9 -; GFX9-NEXT: s_mul_i32 s9, s4, s9 -; GFX9-NEXT: s_add_u32 s5, s5, s9 -; GFX9-NEXT: s_mul_hi_u32 s8, s4, s6 -; GFX9-NEXT: s_addc_u32 s5, s7, s10 -; GFX9-NEXT: s_addc_u32 s7, s8, 0 -; GFX9-NEXT: s_mul_i32 s6, s4, s6 -; GFX9-NEXT: s_add_u32 s5, s5, s6 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s5, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s4, s4, s6 -; GFX9-NEXT: v_readfirstlane_b32 s6, v0 -; GFX9-NEXT: s_mul_i32 s5, s4, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s7, s6, 0xffed2705 -; GFX9-NEXT: s_add_i32 s7, s7, s5 -; GFX9-NEXT: s_sub_i32 s5, s7, s6 -; GFX9-NEXT: s_mul_i32 s8, s6, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s11, s6, s5 -; GFX9-NEXT: s_mul_i32 s12, s6, s5 -; GFX9-NEXT: s_mul_hi_u32 s6, s6, s8 -; GFX9-NEXT: s_add_u32 s6, s6, s12 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s8 -; GFX9-NEXT: s_mul_i32 s10, s4, s8 -; GFX9-NEXT: s_addc_u32 s8, 0, s11 -; GFX9-NEXT: s_add_u32 s6, s6, s10 -; GFX9-NEXT: s_mul_hi_u32 s7, s4, s5 -; GFX9-NEXT: s_addc_u32 s6, s8, s9 -; GFX9-NEXT: s_addc_u32 s7, s7, 0 -; GFX9-NEXT: s_mul_i32 s5, s4, s5 -; GFX9-NEXT: s_add_u32 s5, s6, s5 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s5, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s6, s4, s6 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s2 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, s2 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s2 +; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, v4 +; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, v2 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, v4 +; GFX9-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX9-NEXT: v_mul_hi_u32 v8, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v7, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v8, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s2 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, s2 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s4, s3, 31 -; GFX9-NEXT: s_add_u32 s2, s2, s4 -; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_addc_u32 s3, s3, s4 -; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s7, s2, s6 -; GFX9-NEXT: s_mul_hi_u32 s9, s2, s8 -; GFX9-NEXT: s_mul_hi_u32 s5, s2, s6 -; GFX9-NEXT: s_add_u32 s7, s9, s7 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 -; GFX9-NEXT: s_mul_hi_u32 s10, s3, s8 -; GFX9-NEXT: s_mul_i32 s8, s3, s8 -; GFX9-NEXT: s_add_u32 s7, s7, s8 -; GFX9-NEXT: s_mul_hi_u32 s9, s3, s6 -; GFX9-NEXT: s_addc_u32 s5, s5, s10 -; GFX9-NEXT: s_addc_u32 s7, s9, 0 -; GFX9-NEXT: s_mul_i32 s6, s3, s6 -; GFX9-NEXT: s_add_u32 s5, s5, s6 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: s_add_u32 s7, s5, 1 -; GFX9-NEXT: s_addc_u32 s8, s6, 0 -; GFX9-NEXT: s_add_u32 s9, s5, 2 -; GFX9-NEXT: s_mul_i32 s12, s6, 0x12d8fb -; GFX9-NEXT: s_mul_hi_u32 s13, s5, 0x12d8fb -; GFX9-NEXT: s_addc_u32 s10, s6, 0 -; GFX9-NEXT: s_add_i32 s13, s13, s12 -; GFX9-NEXT: s_mul_i32 s12, s5, 0x12d8fb -; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s2, v0 -; GFX9-NEXT: s_mov_b32 s11, 0x12d8fb -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s2, s3, s13 -; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s11, v0 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s3, s2, 0 -; GFX9-NEXT: s_mov_b32 s11, 0x12d8fa -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s11, v1 -; GFX9-NEXT: s_cmp_eq_u32 s3, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s11, v0 -; GFX9-NEXT: s_cmp_eq_u32 s2, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s6 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_xor_b32_e32 v3, s4, v0 -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v1 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v4, vcc -; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9-NEXT: s_endpgm - %r = sdiv i64 %x, 1235195 - store i64 %r, i64 addrspace(1)* %out - ret void -} - +; GFX9-NEXT: s_ashr_i32 s2, s7, 31 +; GFX9-NEXT: s_add_u32 s0, s6, s2 +; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v8, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v1, v4 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v7, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: s_mov_b32 s3, s2 +; GFX9-NEXT: s_addc_u32 s1, s7, s2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 +; GFX9-NEXT: v_mul_hi_u32 v5, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, s1, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX9-NEXT: s_mov_b32 s3, 0x12d8fb +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, s3 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s3 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s3 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, s1 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s0, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s3, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v5, vcc +; GFX9-NEXT: s_mov_b32 s0, 0x12d8fa +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v6, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] +; GFX9-NEXT: s_endpgm + %r = sdiv i64 %x, 1235195 + store i64 %r, i64 addrspace(1)* %out + ret void +} + define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) { ; CHECK-LABEL: @sdiv_i64_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i64 [[X:%.*]], 4096 @@ -8629,9 +8493,9 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mul_hi_u32 v3, s4, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s5, v0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_mul_lo_u32 v3, s4, v0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, v0, v3 ; GFX6-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -8681,13 +8545,12 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v4 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 1, v0 +; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GFX6-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 2, v0 +; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GFX6-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX6-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GFX6-NEXT: v_mov_b32_e32 v6, s3 ; GFX6-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 @@ -8697,9 +8560,10 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s11, v2 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: s_xor_b64 s[0:1], s[12:13], s[8:9] -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, s0, v0 ; GFX6-NEXT: v_xor_b32_e32 v1, s1, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, s1 @@ -8824,31 +8688,31 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v3, s15 ; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_add_u32 s6, s12, 2 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] -; GFX9-NEXT: s_add_u32 s0, s12, 1 -; GFX9-NEXT: s_addc_u32 s6, s13, 0 -; GFX9-NEXT: s_add_u32 s1, s12, 2 -; GFX9-NEXT: s_addc_u32 s15, s13, 0 -; GFX9-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-NEXT: v_mov_b32_e32 v4, s15 +; GFX9-NEXT: s_addc_u32 s0, s13, 0 +; GFX9-NEXT: s_add_u32 s15, s12, 1 +; GFX9-NEXT: s_addc_u32 s1, s13, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s14 -; GFX9-NEXT: s_cmp_ge_u32 s0, s9 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s7, s7, s14 +; GFX9-NEXT: s_cmp_ge_u32 s7, s9 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 +; GFX9-NEXT: s_cselect_b32 s14, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 -; GFX9-NEXT: s_cmp_eq_u32 s0, s9 +; GFX9-NEXT: s_cmp_eq_u32 s7, s9 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s14 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s13 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v3, s13 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s15 +; GFX9-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v3, s12 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc ; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[2:3] @@ -8961,10 +8825,10 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_mul_lo_u32 v3, v1, s6 ; GFX6-NEXT: s_add_u32 s0, s0, s8 ; GFX6-NEXT: s_addc_u32 s1, s1, 0 -; GFX6-NEXT: s_ashr_i64 s[0:1], s[0:1], 12 +; GFX6-NEXT: s_ashr_i64 s[8:9], s[0:1], 12 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, s6 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v5, v0, v3 ; GFX6-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -8974,8 +8838,8 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GFX6-NEXT: v_mul_lo_u32 v6, v1, v3 ; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX6-NEXT: s_ashr_i32 s8, s3, 31 -; GFX6-NEXT: s_add_u32 s2, s2, s8 +; GFX6-NEXT: s_ashr_i32 s10, s3, 31 +; GFX6-NEXT: s_add_u32 s0, s2, s10 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc @@ -8985,9 +8849,9 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; GFX6-NEXT: v_mul_lo_u32 v2, v1, s6 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, s6 -; GFX6-NEXT: s_mov_b32 s9, s8 -; GFX6-NEXT: s_addc_u32 s3, s3, s8 -; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] +; GFX6-NEXT: s_mov_b32 s11, s10 +; GFX6-NEXT: s_addc_u32 s1, s3, s10 +; GFX6-NEXT: s_xor_b64 s[0:1], s[0:1], s[10:11] ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, s6 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 @@ -9007,195 +8871,180 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GFX6-NEXT: v_mul_lo_u32 v2, s2, v1 -; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v4, s2, v1 -; GFX6-NEXT: v_mul_hi_u32 v5, s3, v1 -; GFX6-NEXT: v_mul_lo_u32 v1, s3, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX6-NEXT: v_mul_hi_u32 v3, s0, v0 +; GFX6-NEXT: v_mul_hi_u32 v4, s0, v1 +; GFX6-NEXT: v_mul_hi_u32 v5, s1, v1 +; GFX6-NEXT: v_mul_lo_u32 v1, s1, v1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, s3, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0 -; GFX6-NEXT: s_movk_i32 s9, 0xfff +; GFX6-NEXT: v_mul_lo_u32 v4, s1, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX6-NEXT: s_movk_i32 s2, 0xfff ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, v1, s9 -; GFX6-NEXT: v_mul_hi_u32 v5, v0, s9 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_mul_lo_u32 v8, v0, s9 +; GFX6-NEXT: v_mul_lo_u32 v4, v1, s2 +; GFX6-NEXT: v_mul_hi_u32 v5, v0, s2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 2, v0 +; GFX6-NEXT: v_mul_lo_u32 v8, v0, s2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, 1, v0 ; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX6-NEXT: v_mov_b32_e32 v5, s3 -; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s2, v8 +; GFX6-NEXT: v_mov_b32_e32 v5, s1 +; GFX6-NEXT: v_sub_i32_e32 v8, vcc, s0, v8 ; GFX6-NEXT: v_subb_u32_e32 v4, vcc, v5, v4, vcc -; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s9, v8 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s2, v8 ; GFX6-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v4, vcc -; GFX6-NEXT: s_movk_i32 s2, 0xffe -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s2, v5 +; GFX6-NEXT: s_movk_i32 s0, 0xffe +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s0, v5 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 ; GFX6-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; GFX6-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v8 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s2, v8 -; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, -1, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 -; GFX6-NEXT: v_xor_b32_e32 v1, s8, v1 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[0:1] +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX6-NEXT: v_xor_b32_e32 v0, s10, v0 +; GFX6-NEXT: v_xor_b32_e32 v1, s10, v1 +; GFX6-NEXT: v_mov_b32_e32 v3, s10 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s10, v0 ; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s8 +; GFX6-NEXT: v_mov_b32_e32 v1, s9 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: ssdiv_v2i64_mixed_pow2k_denom: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v1, 0x457ff000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX9-NEXT: v_mac_f32_e32 v1, 0, v2 -; GFX9-NEXT: v_rcp_f32_e32 v1, v1 -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x457ff000 +; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX9-NEXT: v_mac_f32_e32 v0, 0, v1 +; GFX9-NEXT: v_rcp_f32_e32 v0, v0 +; GFX9-NEXT: s_movk_i32 s8, 0xf001 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 -; GFX9-NEXT: v_trunc_f32_e32 v2, v2 -; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 +; GFX9-NEXT: v_trunc_f32_e32 v1, v1 +; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s0, s5, 31 ; GFX9-NEXT: s_lshr_b32 s0, s0, 20 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s8 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 ; GFX9-NEXT: s_add_u32 s0, s4, s0 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: v_readfirstlane_b32 s8, v2 ; GFX9-NEXT: s_addc_u32 s1, s5, 0 -; GFX9-NEXT: s_mul_hi_u32 s5, s4, 0xfffff001 -; GFX9-NEXT: s_mul_i32 s9, s8, 0xfffff001 -; GFX9-NEXT: s_add_i32 s5, s5, s9 -; GFX9-NEXT: s_sub_i32 s5, s5, s4 -; GFX9-NEXT: s_mul_i32 s11, s4, 0xfffff001 -; GFX9-NEXT: s_ashr_i64 s[0:1], s[0:1], 12 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s5 -; GFX9-NEXT: s_mul_i32 s10, s4, s5 -; GFX9-NEXT: s_mul_hi_u32 s4, s4, s11 -; GFX9-NEXT: s_add_u32 s4, s4, s10 -; GFX9-NEXT: s_addc_u32 s9, 0, s9 -; GFX9-NEXT: s_mul_hi_u32 s12, s8, s11 -; GFX9-NEXT: s_mul_i32 s11, s8, s11 -; GFX9-NEXT: s_add_u32 s4, s4, s11 -; GFX9-NEXT: s_mul_hi_u32 s10, s8, s5 -; GFX9-NEXT: s_addc_u32 s4, s9, s12 -; GFX9-NEXT: s_addc_u32 s9, s10, 0 -; GFX9-NEXT: s_mul_i32 s5, s8, s5 -; GFX9-NEXT: s_add_u32 s4, s4, s5 -; GFX9-NEXT: s_addc_u32 s5, 0, s9 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, s4, v1 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s4, s8, s5 -; GFX9-NEXT: v_readfirstlane_b32 s8, v1 -; GFX9-NEXT: s_mul_i32 s5, s4, 0xfffff001 -; GFX9-NEXT: s_mul_hi_u32 s9, s8, 0xfffff001 -; GFX9-NEXT: s_add_i32 s9, s9, s5 -; GFX9-NEXT: s_sub_i32 s5, s9, s8 -; GFX9-NEXT: s_mul_i32 s10, s8, 0xfffff001 -; GFX9-NEXT: s_mul_hi_u32 s13, s8, s5 -; GFX9-NEXT: s_mul_i32 s14, s8, s5 -; GFX9-NEXT: s_mul_hi_u32 s8, s8, s10 -; GFX9-NEXT: s_add_u32 s8, s8, s14 -; GFX9-NEXT: s_mul_hi_u32 s11, s4, s10 -; GFX9-NEXT: s_mul_i32 s12, s4, s10 -; GFX9-NEXT: s_addc_u32 s10, 0, s13 -; GFX9-NEXT: s_add_u32 s8, s8, s12 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s5 -; GFX9-NEXT: s_addc_u32 s8, s10, s11 -; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: s_mul_i32 s5, s4, s5 -; GFX9-NEXT: s_add_u32 s5, s8, s5 -; GFX9-NEXT: s_addc_u32 s8, 0, s9 -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, s5, v1 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s8, s4, s8 -; GFX9-NEXT: s_ashr_i32 s4, s7, 31 -; GFX9-NEXT: s_add_u32 s6, s6, s4 -; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_addc_u32 s7, s7, s4 -; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s10, v1 -; GFX9-NEXT: s_mul_i32 s9, s6, s8 -; GFX9-NEXT: s_mul_hi_u32 s11, s6, s10 -; GFX9-NEXT: s_mul_hi_u32 s5, s6, s8 -; GFX9-NEXT: s_add_u32 s9, s11, s9 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 -; GFX9-NEXT: s_mul_hi_u32 s12, s7, s10 -; GFX9-NEXT: s_mul_i32 s10, s7, s10 -; GFX9-NEXT: s_add_u32 s9, s9, s10 -; GFX9-NEXT: s_mul_hi_u32 s11, s7, s8 -; GFX9-NEXT: s_addc_u32 s5, s5, s12 -; GFX9-NEXT: s_addc_u32 s9, s11, 0 -; GFX9-NEXT: s_mul_i32 s8, s7, s8 -; GFX9-NEXT: s_add_u32 s5, s5, s8 -; GFX9-NEXT: s_addc_u32 s8, 0, s9 -; GFX9-NEXT: s_add_u32 s9, s5, 1 -; GFX9-NEXT: s_addc_u32 s10, s8, 0 -; GFX9-NEXT: s_add_u32 s11, s5, 2 -; GFX9-NEXT: s_mul_i32 s14, s8, 0xfff -; GFX9-NEXT: s_mul_hi_u32 s15, s5, 0xfff -; GFX9-NEXT: s_addc_u32 s12, s8, 0 -; GFX9-NEXT: s_add_i32 s15, s15, s14 -; GFX9-NEXT: s_mul_i32 s14, s5, 0xfff -; GFX9-NEXT: v_mov_b32_e32 v1, s14 -; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s6, v1 -; GFX9-NEXT: s_movk_i32 s13, 0xfff -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s6, s7, s15 -; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s13, v1 -; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s7, s6, 0 -; GFX9-NEXT: s_movk_i32 s13, 0xffe -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s13, v2 -; GFX9-NEXT: s_cmp_eq_u32 s7, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-NEXT: v_mov_b32_e32 v4, s11 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-NEXT: v_mov_b32_e32 v4, s12 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s13, v1 -; GFX9-NEXT: s_cmp_eq_u32 s6, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc -; GFX9-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GFX9-NEXT: v_xor_b32_e32 v2, s4, v2 -; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s4, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v4, vcc -; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: global_store_dwordx4 v0, v[1:4], s[2:3] +; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v1, v2 +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v5, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v1, v4 +; GFX9-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX9-NEXT: s_ashr_i64 s[4:5], s[0:1], 12 +; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v4, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v7, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, s8 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, s8 +; GFX9-NEXT: v_mul_lo_u32 v4, v0, s8 +; GFX9-NEXT: s_ashr_i32 s8, s7, 31 +; GFX9-NEXT: s_add_u32 s0, s6, s8 +; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v0 +; GFX9-NEXT: v_mul_lo_u32 v6, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v0, v4 +; GFX9-NEXT: v_mul_hi_u32 v8, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v5, v1, v4 +; GFX9-NEXT: v_mul_lo_u32 v4, v1, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v7, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc +; GFX9-NEXT: v_mul_lo_u32 v2, v1, v2 +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v5, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: s_mov_b32 s9, s8 +; GFX9-NEXT: s_addc_u32 s1, s7, s8 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9] +; GFX9-NEXT: v_mul_lo_u32 v2, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, s0, v0 +; GFX9-NEXT: v_mul_hi_u32 v5, s0, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, s1, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v1 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, s1, v0 +; GFX9-NEXT: v_mul_hi_u32 v0, s1, v0 +; GFX9-NEXT: s_movk_i32 s6, 0xfff +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v3, v0, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 2, v0 +; GFX9-NEXT: v_mul_lo_u32 v5, v1, s6 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, s6 +; GFX9-NEXT: v_mul_lo_u32 v9, v0, s6 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 1, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 +; GFX9-NEXT: v_mov_b32_e32 v6, s1 +; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, s0, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s6, v9 +; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v5, vcc +; GFX9-NEXT: s_movk_i32 s0, 0xffe +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v9 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v6, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s8, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s8, v0 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX9-NEXT: s_endpgm %r = sdiv <2 x i64> %x, store <2 x i64> %r, <2 x i64> addrspace(1)* %out @@ -9250,6 +9099,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mul_hi_u32 v3, s10, v0 ; GFX6-NEXT: v_mul_lo_u32 v5, s11, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s10, v0 +; GFX6-NEXT: s_xor_b64 s[14:15], s[16:17], s[14:15] ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -9310,9 +9160,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mul_hi_u32 v3, s12, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s13, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, s13 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_mul_lo_u32 v3, s12, v0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s5, v2 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s4, v3 ; GFX6-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v5, vcc @@ -9324,71 +9174,72 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], s13, v4 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 1, v0 +; GFX6-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GFX6-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 2, v0 +; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GFX6-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] -; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GFX6-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[0:1] -; GFX6-NEXT: s_xor_b64 s[0:1], s[16:17], s[14:15] ; GFX6-NEXT: s_ashr_i32 s4, s3, 31 +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 ; GFX6-NEXT: s_add_u32 s2, s2, s4 +; GFX6-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GFX6-NEXT: v_mov_b32_e32 v6, s5 ; GFX6-NEXT: s_mov_b32 s5, s4 ; GFX6-NEXT: s_addc_u32 s3, s3, s4 ; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: v_cvt_f32_u32_e32 v8, s2 +; GFX6-NEXT: v_cvt_f32_u32_e32 v9, s3 ; GFX6-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc -; GFX6-NEXT: v_cvt_f32_u32_e32 v6, s2 -; GFX6-NEXT: v_cvt_f32_u32_e32 v7, s3 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s13, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 -; GFX6-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 -; GFX6-NEXT: v_rcp_f32_e32 v6, v6 ; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s13, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc +; GFX6-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc +; GFX6-NEXT: v_rcp_f32_e32 v3, v8 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v6 -; GFX6-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 -; GFX6-NEXT: v_trunc_f32_e32 v3, v3 -; GFX6-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] +; GFX6-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; GFX6-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; GFX6-NEXT: v_trunc_f32_e32 v4, v4 +; GFX6-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX6-NEXT: s_sub_u32 s12, 0, s2 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX6-NEXT: v_mul_hi_u32 v4, s12, v2 -; GFX6-NEXT: v_mul_lo_u32 v5, s12, v3 -; GFX6-NEXT: s_subb_u32 s13, 0, s3 -; GFX6-NEXT: v_mul_lo_u32 v6, s13, v2 -; GFX6-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX6-NEXT: v_mul_lo_u32 v5, s12, v2 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GFX6-NEXT: v_mul_lo_u32 v6, v2, v4 -; GFX6-NEXT: v_mul_hi_u32 v7, v2, v5 -; GFX6-NEXT: v_mul_hi_u32 v8, v2, v4 -; GFX6-NEXT: v_mul_hi_u32 v9, v3, v4 -; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4 +; GFX6-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GFX6-NEXT: s_sub_u32 s0, 0, s2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_mul_hi_u32 v2, s0, v3 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v4 +; GFX6-NEXT: s_subb_u32 s1, 0, s3 +; GFX6-NEXT: v_mul_lo_u32 v6, s1, v3 +; GFX6-NEXT: s_ashr_i32 s12, s7, 31 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GFX6-NEXT: v_mul_lo_u32 v6, v3, v2 +; GFX6-NEXT: v_mul_hi_u32 v7, v3, v5 +; GFX6-NEXT: v_mul_hi_u32 v8, v3, v2 +; GFX6-NEXT: v_mul_hi_u32 v9, v4, v2 +; GFX6-NEXT: v_mul_lo_u32 v2, v4, v2 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GFX6-NEXT: v_mul_lo_u32 v8, v3, v5 -; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5 -; GFX6-NEXT: v_xor_b32_e32 v1, s1, v1 +; GFX6-NEXT: v_mul_lo_u32 v8, v4, v5 +; GFX6-NEXT: v_mul_hi_u32 v5, v4, v5 +; GFX6-NEXT: s_mov_b32 s13, s12 +; GFX6-NEXT: v_xor_b32_e32 v0, s14, v0 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, s12, v3 -; GFX6-NEXT: v_mul_hi_u32 v5, s12, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, s13, v2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v4, v5, vcc +; GFX6-NEXT: v_mul_lo_u32 v4, s0, v3 +; GFX6-NEXT: v_mul_hi_u32 v5, s0, v2 +; GFX6-NEXT: v_mul_lo_u32 v6, s1, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, s15, v1 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX6-NEXT: v_mul_lo_u32 v5, s12, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v2 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; GFX6-NEXT: v_mul_lo_u32 v8, v2, v4 ; GFX6-NEXT: v_mul_hi_u32 v9, v2, v5 @@ -9403,14 +9254,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v9, v7, vcc ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GFX6-NEXT: s_ashr_i32 s12, s7, 31 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc -; GFX6-NEXT: s_add_u32 s6, s6, s12 +; GFX6-NEXT: s_add_u32 s0, s6, s12 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GFX6-NEXT: s_mov_b32 s13, s12 -; GFX6-NEXT: s_addc_u32 s7, s7, s12 +; GFX6-NEXT: s_addc_u32 s1, s7, s12 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc -; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[12:13] +; GFX6-NEXT: s_xor_b64 s[6:7], s[0:1], s[12:13] ; GFX6-NEXT: v_mul_lo_u32 v4, s6, v3 ; GFX6-NEXT: v_mul_hi_u32 v5, s6, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, s6, v3 @@ -9420,7 +9269,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; GFX6-NEXT: v_mul_lo_u32 v7, s7, v2 ; GFX6-NEXT: v_mul_hi_u32 v2, s7, v2 -; GFX6-NEXT: v_mov_b32_e32 v6, s1 +; GFX6-NEXT: v_mov_b32_e32 v6, s15 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v7 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v8, vcc @@ -9428,12 +9277,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_mul_lo_u32 v4, s2, v3 ; GFX6-NEXT: v_mul_hi_u32 v5, s2, v2 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s14, v0 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc ; GFX6-NEXT: v_mul_lo_u32 v6, s3, v2 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GFX6-NEXT: v_mul_lo_u32 v5, s2, v2 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, s7, v4 ; GFX6-NEXT: v_mov_b32_e32 v7, s3 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, s6, v5 @@ -9446,13 +9295,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, v8, v7, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 1, v2 +; GFX6-NEXT: v_add_i32_e64 v7, s[0:1], 2, v2 ; GFX6-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v3, s[0:1] -; GFX6-NEXT: v_add_i32_e64 v9, s[0:1], 2, v2 +; GFX6-NEXT: v_add_i32_e64 v9, s[0:1], 1, v2 ; GFX6-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v6, v7, v9, s[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v7, v8, v10, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v6, v10, v8, s[0:1] ; GFX6-NEXT: v_mov_b32_e32 v8, s7 ; GFX6-NEXT: v_subb_u32_e32 v4, vcc, v8, v4, vcc ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s3, v4 @@ -9462,9 +9310,10 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s3, v4 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v4, v9, v7, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: s_xor_b64 s[0:1], s[12:13], s[4:5] -; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc ; GFX6-NEXT: v_xor_b32_e32 v2, s0, v2 ; GFX6-NEXT: v_xor_b32_e32 v3, s1, v3 ; GFX6-NEXT: v_mov_b32_e32 v4, s1 @@ -9590,36 +9439,36 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v2, s19 ; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_add_u32 s4, s16, 2 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[0:1] -; GFX9-NEXT: s_add_u32 s0, s16, 1 -; GFX9-NEXT: s_addc_u32 s4, s17, 0 -; GFX9-NEXT: s_add_u32 s1, s16, 2 -; GFX9-NEXT: s_addc_u32 s19, s17, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: s_addc_u32 s0, s17, 0 +; GFX9-NEXT: s_add_u32 s19, s16, 1 +; GFX9-NEXT: s_addc_u32 s1, s17, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s5, s18 -; GFX9-NEXT: s_cmp_ge_u32 s0, s13 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s5, s5, s18 +; GFX9-NEXT: s_cmp_ge_u32 s5, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 +; GFX9-NEXT: s_cselect_b32 s18, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 -; GFX9-NEXT: s_cmp_eq_u32 s0, s13 +; GFX9-NEXT: s_cmp_eq_u32 s5, s13 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s17 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX9-NEXT: s_xor_b64 s[0:1], s[14:15], s[8:9] ; GFX9-NEXT: s_ashr_i32 s4, s11, 31 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc ; GFX9-NEXT: s_add_u32 s8, s10, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, s17 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: s_mov_b32 s5, s4 ; GFX9-NEXT: s_addc_u32 s9, s11, s4 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, s16 ; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc @@ -9730,31 +9579,31 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v5, s15 ; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_add_u32 s6, s12, 2 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[0:1] -; GFX9-NEXT: s_add_u32 s0, s12, 1 -; GFX9-NEXT: s_addc_u32 s6, s13, 0 -; GFX9-NEXT: s_add_u32 s1, s12, 2 -; GFX9-NEXT: s_addc_u32 s15, s13, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, s0 -; GFX9-NEXT: v_mov_b32_e32 v6, s1 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NEXT: v_mov_b32_e32 v6, s15 +; GFX9-NEXT: s_addc_u32 s0, s13, 0 +; GFX9-NEXT: s_add_u32 s15, s12, 1 +; GFX9-NEXT: s_addc_u32 s1, s13, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s14 -; GFX9-NEXT: s_cmp_ge_u32 s0, s9 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s7, s7, s14 +; GFX9-NEXT: s_cmp_ge_u32 s7, s9 +; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 +; GFX9-NEXT: s_cselect_b32 s14, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 -; GFX9-NEXT: s_cmp_eq_u32 s0, s9 +; GFX9-NEXT: s_cmp_eq_u32 s7, s9 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v6, s1 +; GFX9-NEXT: v_mov_b32_e32 v5, s14 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc -; GFX9-NEXT: v_mov_b32_e32 v6, s13 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GFX9-NEXT: v_mov_b32_e32 v5, s13 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v5, s12 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc ; GFX9-NEXT: s_xor_b64 s[0:1], s[10:11], s[4:5] @@ -9783,19 +9632,23 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_mov_b32_e32 v0, 0x4f800000 ; GFX6-NEXT: v_madak_f32 v0, 0, v0, 0x4996c7d8 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0xffed2705 -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s4, 0xffed2705 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 ; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s8 -; GFX6-NEXT: v_mul_lo_u32 v4, v0, s8 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_ashr_i32 s8, s3, 31 +; GFX6-NEXT: s_add_u32 s2, s2, s8 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s4 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s4 +; GFX6-NEXT: v_mul_lo_u32 v4, v0, s4 +; GFX6-NEXT: s_mov_b32 s9, s8 +; GFX6-NEXT: s_addc_u32 s3, s3, s8 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 ; GFX6-NEXT: v_mul_hi_u32 v3, v0, v4 @@ -9807,6 +9660,8 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GFX6-NEXT: v_mul_lo_u32 v6, v1, v4 ; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] +; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc @@ -9814,10 +9669,11 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GFX6-NEXT: v_mul_lo_u32 v2, v1, s8 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s8 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GFX6-NEXT: v_mul_lo_u32 v3, v0, s8 +; GFX6-NEXT: v_mul_lo_u32 v2, v1, s4 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s4 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_mul_lo_u32 v3, v0, s4 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v0, v2 ; GFX6-NEXT: v_mul_hi_u32 v7, v0, v3 @@ -9832,64 +9688,57 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v7, v5, vcc ; GFX6-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_ashr_i32 s8, s7, 31 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GFX6-NEXT: s_add_u32 s0, s6, s8 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: s_mov_b32 s9, s8 -; GFX6-NEXT: s_addc_u32 s1, s7, s8 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GFX6-NEXT: s_xor_b64 s[6:7], s[0:1], s[8:9] -; GFX6-NEXT: v_mul_lo_u32 v2, s6, v1 -; GFX6-NEXT: v_mul_hi_u32 v3, s6, v0 -; GFX6-NEXT: v_mul_hi_u32 v4, s6, v1 -; GFX6-NEXT: v_mul_hi_u32 v5, s7, v1 -; GFX6-NEXT: v_mul_lo_u32 v1, s7, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, s2, v1 +; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0 +; GFX6-NEXT: v_mul_hi_u32 v4, s2, v1 +; GFX6-NEXT: v_mul_hi_u32 v5, s3, v1 +; GFX6-NEXT: v_mul_lo_u32 v1, s3, v1 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GFX6-NEXT: v_mul_lo_u32 v4, s7, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0 -; GFX6-NEXT: s_mov_b32 s0, s4 -; GFX6-NEXT: s_mov_b32 s4, 0x12d8fb +; GFX6-NEXT: v_mul_lo_u32 v4, s3, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0 +; GFX6-NEXT: s_mov_b32 s4, s0 +; GFX6-NEXT: s_mov_b32 s0, 0x12d8fb ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; GFX6-NEXT: v_mul_lo_u32 v1, v1, s4 -; GFX6-NEXT: v_mul_hi_u32 v2, v0, s4 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, s4 -; GFX6-NEXT: s_mov_b32 s1, s5 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s0 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, s0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s0 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mov_b32_e32 v2, s7 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GFX6-NEXT: v_mov_b32_e32 v2, s3 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s0, v0 ; GFX6-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v1, vcc -; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v2 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s0, v2 ; GFX6-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v3, vcc -; GFX6-NEXT: s_mov_b32 s4, 0x12d8fa -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2 +; GFX6-NEXT: s_mov_b32 s0, 0x12d8fa +; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s0, v2 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX6-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GFX6-NEXT: v_cmp_lt_u32_e64 s[0:1], s0, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[0:1] +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX6-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 -; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX6-NEXT: v_xor_b32_e32 v0, s8, v0 ; GFX6-NEXT: v_xor_b32_e32 v1, s8, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, s8 ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s8, v0 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc -; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm ; ; GFX9-LABEL: srem_i64_oddk_denom: @@ -9898,7 +9747,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX9-NEXT: v_mac_f32_e32 v0, 0, v1 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 @@ -9906,115 +9755,115 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: v_readfirstlane_b32 s5, v0 -; GFX9-NEXT: s_mul_hi_u32 s6, s5, 0xffed2705 -; GFX9-NEXT: s_mul_i32 s7, s4, 0xffed2705 -; GFX9-NEXT: s_add_i32 s6, s6, s7 -; GFX9-NEXT: s_sub_i32 s6, s6, s5 -; GFX9-NEXT: s_mul_i32 s9, s5, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s7, s5, s6 -; GFX9-NEXT: s_mul_i32 s8, s5, s6 -; GFX9-NEXT: s_mul_hi_u32 s5, s5, s9 -; GFX9-NEXT: s_add_u32 s5, s5, s8 -; GFX9-NEXT: s_addc_u32 s7, 0, s7 -; GFX9-NEXT: s_mul_hi_u32 s10, s4, s9 -; GFX9-NEXT: s_mul_i32 s9, s4, s9 -; GFX9-NEXT: s_add_u32 s5, s5, s9 -; GFX9-NEXT: s_mul_hi_u32 s8, s4, s6 -; GFX9-NEXT: s_addc_u32 s5, s7, s10 -; GFX9-NEXT: s_addc_u32 s7, s8, 0 -; GFX9-NEXT: s_mul_i32 s6, s4, s6 -; GFX9-NEXT: s_add_u32 s5, s5, s6 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s5, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 +; GFX9-NEXT: s_mul_hi_u32 s2, s1, 0xffed2705 +; GFX9-NEXT: s_mul_i32 s3, s0, 0xffed2705 +; GFX9-NEXT: s_add_i32 s2, s2, s3 +; GFX9-NEXT: s_sub_i32 s2, s2, s1 +; GFX9-NEXT: s_mul_i32 s9, s1, 0xffed2705 +; GFX9-NEXT: s_mul_hi_u32 s3, s1, s2 +; GFX9-NEXT: s_mul_i32 s8, s1, s2 +; GFX9-NEXT: s_mul_hi_u32 s1, s1, s9 +; GFX9-NEXT: s_add_u32 s1, s1, s8 +; GFX9-NEXT: s_addc_u32 s3, 0, s3 +; GFX9-NEXT: s_mul_hi_u32 s10, s0, s9 +; GFX9-NEXT: s_mul_i32 s9, s0, s9 +; GFX9-NEXT: s_add_u32 s1, s1, s9 +; GFX9-NEXT: s_mul_hi_u32 s8, s0, s2 +; GFX9-NEXT: s_addc_u32 s1, s3, s10 +; GFX9-NEXT: s_addc_u32 s3, s8, 0 +; GFX9-NEXT: s_mul_i32 s2, s0, s2 +; GFX9-NEXT: s_add_u32 s1, s1, s2 +; GFX9-NEXT: s_addc_u32 s2, 0, s3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s1, v0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s4, s4, s6 -; GFX9-NEXT: v_readfirstlane_b32 s6, v0 -; GFX9-NEXT: s_mul_i32 s5, s4, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s7, s6, 0xffed2705 -; GFX9-NEXT: s_add_i32 s7, s7, s5 -; GFX9-NEXT: s_sub_i32 s5, s7, s6 -; GFX9-NEXT: s_mul_i32 s8, s6, 0xffed2705 -; GFX9-NEXT: s_mul_hi_u32 s11, s6, s5 -; GFX9-NEXT: s_mul_i32 s12, s6, s5 -; GFX9-NEXT: s_mul_hi_u32 s6, s6, s8 -; GFX9-NEXT: s_add_u32 s6, s6, s12 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s8 -; GFX9-NEXT: s_mul_i32 s10, s4, s8 +; GFX9-NEXT: s_addc_u32 s0, s0, s2 +; GFX9-NEXT: v_readfirstlane_b32 s2, v0 +; GFX9-NEXT: s_mul_i32 s1, s0, 0xffed2705 +; GFX9-NEXT: s_mul_hi_u32 s3, s2, 0xffed2705 +; GFX9-NEXT: s_add_i32 s3, s3, s1 +; GFX9-NEXT: s_sub_i32 s1, s3, s2 +; GFX9-NEXT: s_mul_i32 s8, s2, 0xffed2705 +; GFX9-NEXT: s_mul_hi_u32 s11, s2, s1 +; GFX9-NEXT: s_mul_i32 s12, s2, s1 +; GFX9-NEXT: s_mul_hi_u32 s2, s2, s8 +; GFX9-NEXT: s_add_u32 s2, s2, s12 +; GFX9-NEXT: s_mul_hi_u32 s9, s0, s8 +; GFX9-NEXT: s_mul_i32 s10, s0, s8 ; GFX9-NEXT: s_addc_u32 s8, 0, s11 -; GFX9-NEXT: s_add_u32 s6, s6, s10 -; GFX9-NEXT: s_mul_hi_u32 s7, s4, s5 -; GFX9-NEXT: s_addc_u32 s6, s8, s9 -; GFX9-NEXT: s_addc_u32 s7, s7, 0 -; GFX9-NEXT: s_mul_i32 s5, s4, s5 -; GFX9-NEXT: s_add_u32 s5, s6, s5 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s5, v0 +; GFX9-NEXT: s_add_u32 s2, s2, s10 +; GFX9-NEXT: s_mul_hi_u32 s3, s0, s1 +; GFX9-NEXT: s_addc_u32 s2, s8, s9 +; GFX9-NEXT: s_addc_u32 s3, s3, 0 +; GFX9-NEXT: s_mul_i32 s1, s0, s1 +; GFX9-NEXT: s_add_u32 s1, s2, s1 +; GFX9-NEXT: s_addc_u32 s2, 0, s3 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s1, v0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_addc_u32 s6, s4, s6 +; GFX9-NEXT: s_addc_u32 s8, s0, s2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i32 s4, s3, 31 -; GFX9-NEXT: s_add_u32 s2, s2, s4 -; GFX9-NEXT: s_mov_b32 s5, s4 -; GFX9-NEXT: s_addc_u32 s3, s3, s4 -; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s7, s2, s6 -; GFX9-NEXT: s_mul_hi_u32 s9, s2, s8 -; GFX9-NEXT: s_mul_hi_u32 s5, s2, s6 -; GFX9-NEXT: s_add_u32 s7, s9, s7 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 -; GFX9-NEXT: s_mul_hi_u32 s10, s3, s8 -; GFX9-NEXT: s_mul_i32 s8, s3, s8 -; GFX9-NEXT: s_add_u32 s7, s7, s8 -; GFX9-NEXT: s_mul_hi_u32 s9, s3, s6 -; GFX9-NEXT: s_addc_u32 s5, s5, s10 -; GFX9-NEXT: s_addc_u32 s7, s9, 0 -; GFX9-NEXT: s_mul_i32 s6, s3, s6 -; GFX9-NEXT: s_add_u32 s5, s5, s6 -; GFX9-NEXT: s_addc_u32 s6, 0, s7 -; GFX9-NEXT: s_mul_hi_u32 s8, s5, 0x12d8fb -; GFX9-NEXT: s_mul_i32 s5, s5, 0x12d8fb +; GFX9-NEXT: s_ashr_i32 s2, s7, 31 +; GFX9-NEXT: s_add_u32 s0, s6, s2 +; GFX9-NEXT: s_mov_b32 s3, s2 +; GFX9-NEXT: s_addc_u32 s1, s7, s2 +; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: v_readfirstlane_b32 s7, v0 +; GFX9-NEXT: s_mul_i32 s6, s0, s8 +; GFX9-NEXT: s_mul_hi_u32 s9, s0, s7 +; GFX9-NEXT: s_mul_hi_u32 s3, s0, s8 +; GFX9-NEXT: s_add_u32 s6, s9, s6 +; GFX9-NEXT: s_addc_u32 s3, 0, s3 +; GFX9-NEXT: s_mul_hi_u32 s10, s1, s7 +; GFX9-NEXT: s_mul_i32 s7, s1, s7 +; GFX9-NEXT: s_add_u32 s6, s6, s7 +; GFX9-NEXT: s_mul_hi_u32 s9, s1, s8 +; GFX9-NEXT: s_addc_u32 s3, s3, s10 +; GFX9-NEXT: s_addc_u32 s6, s9, 0 +; GFX9-NEXT: s_mul_i32 s7, s1, s8 +; GFX9-NEXT: s_add_u32 s3, s3, s7 +; GFX9-NEXT: s_addc_u32 s6, 0, s6 +; GFX9-NEXT: s_mul_hi_u32 s8, s3, 0x12d8fb +; GFX9-NEXT: s_mul_i32 s3, s3, 0x12d8fb ; GFX9-NEXT: s_mul_i32 s6, s6, 0x12d8fb -; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: s_add_i32 s8, s8, s6 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s2, v0 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v0 ; GFX9-NEXT: s_mov_b32 s7, 0x12d8fb ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s2, s3, s8 +; GFX9-NEXT: s_subb_u32 s3, s1, s8 ; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s7, v0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s3, s2, 0 +; GFX9-NEXT: s_subb_u32 s0, s3, 0 ; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s7, v1 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: s_subb_u32 s5, s3, 0 +; GFX9-NEXT: s_subb_u32 s1, s0, 0 ; GFX9-NEXT: s_mov_b32 s6, 0x12d8fa ; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1 -; GFX9-NEXT: s_cmp_eq_u32 s3, 0 +; GFX9-NEXT: s_cmp_eq_u32 s0, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s3 -; GFX9-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-NEXT: v_mov_b32_e32 v5, s0 +; GFX9-NEXT: v_mov_b32_e32 v6, s1 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], s6, v0 +; GFX9-NEXT: s_cmp_eq_u32 s3, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 -; GFX9-NEXT: s_cmp_eq_u32 s2, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s4, v3 -; GFX9-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] +; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s2, v4 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s2, v0 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_endpgm %r = srem i64 %x, 1235195 store i64 %r, i64 addrspace(1)* %out @@ -10133,7 +9982,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mul_hi_u32 v3, s4, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s5, v0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_mul_lo_u32 v3, s4, v0 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v0, v2 @@ -10171,8 +10020,8 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mul_hi_u32 v2, s8, v0 ; GFX6-NEXT: v_mul_lo_u32 v3, s9, v0 ; GFX6-NEXT: v_mul_lo_u32 v0, s8, v0 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s13, v1 ; GFX6-NEXT: v_mov_b32_e32 v3, s9 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s12, v0 @@ -10189,19 +10038,19 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GFX6-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GFX6-NEXT: v_mov_b32_e32 v4, s13 -; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 ; GFX6-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; GFX6-NEXT: v_mov_b32_e32 v5, s13 +; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_xor_b32_e32 v1, s10, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, s10 @@ -10331,26 +10180,26 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX9-NEXT: s_subb_u32 s2, s6, s9 ; GFX9-NEXT: v_subrev_co_u32_e64 v4, s[0:1], s8, v2 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX9-NEXT: s_subb_u32 s2, s2, 0 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v3, s12 -; GFX9-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NEXT: s_subb_u32 s0, s2, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s11 -; GFX9-NEXT: s_cmp_ge_u32 s0, s9 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s2, s7, s11 +; GFX9-NEXT: s_cmp_ge_u32 s2, s9 +; GFX9-NEXT: v_mov_b32_e32 v5, s12 +; GFX9-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 +; GFX9-NEXT: s_cselect_b32 s3, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 -; GFX9-NEXT: s_cmp_eq_u32 s0, s9 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: s_cmp_eq_u32 s2, s9 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX9-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GFX9-NEXT: v_mov_b32_e32 v5, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v6, s2 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc ; GFX9-NEXT: v_xor_b32_e32 v1, s10, v1 ; GFX9-NEXT: v_xor_b32_e32 v2, s10, v3 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 @@ -10505,7 +10354,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mul_lo_u32 v2, s2, v1 ; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0 ; GFX6-NEXT: v_mul_lo_u32 v4, s3, v0 -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_mul_lo_u32 v3, s2, v0 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, v0, v2 @@ -10542,8 +10391,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mul_hi_u32 v2, s16, v0 ; GFX6-NEXT: v_mul_lo_u32 v3, s17, v0 ; GFX6-NEXT: v_mul_lo_u32 v0, s16, v0 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s5, v1 ; GFX6-NEXT: v_mov_b32_e32 v3, s17 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 @@ -10559,49 +10408,49 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_subrev_i32_e64 v3, s[0:1], s16, v4 ; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GFX6-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] +; GFX6-NEXT: s_ashr_i32 s2, s15, 31 ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] +; GFX6-NEXT: s_add_u32 s4, s14, s2 ; GFX6-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] -; GFX6-NEXT: s_ashr_i32 s0, s15, 31 -; GFX6-NEXT: s_add_u32 s2, s14, s0 -; GFX6-NEXT: s_mov_b32 s1, s0 -; GFX6-NEXT: s_addc_u32 s3, s15, s0 -; GFX6-NEXT: v_mov_b32_e32 v4, s5 -; GFX6-NEXT: s_xor_b64 s[4:5], s[2:3], s[0:1] -; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GFX6-NEXT: v_cvt_f32_u32_e32 v4, s4 -; GFX6-NEXT: v_cvt_f32_u32_e32 v5, s5 +; GFX6-NEXT: v_mov_b32_e32 v5, s5 +; GFX6-NEXT: s_mov_b32 s3, s2 +; GFX6-NEXT: s_addc_u32 s5, s15, s2 +; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3] +; GFX6-NEXT: v_cvt_f32_u32_e32 v6, s4 +; GFX6-NEXT: v_cvt_f32_u32_e32 v7, s5 +; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s17, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GFX6-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s16, v0 -; GFX6-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; GFX6-NEXT: v_rcp_f32_e32 v4, v4 -; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX6-NEXT: v_rcp_f32_e32 v6, v6 +; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s17, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v4 -; GFX6-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GFX6-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v6 +; GFX6-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 ; GFX6-NEXT: v_trunc_f32_e32 v4, v4 -; GFX6-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GFX6-NEXT: s_sub_u32 s0, 0, s4 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_mul_hi_u32 v3, s0, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_mul_hi_u32 v2, s0, v3 ; GFX6-NEXT: v_mul_lo_u32 v5, s0, v4 ; GFX6-NEXT: s_subb_u32 s1, 0, s5 -; GFX6-NEXT: v_mul_lo_u32 v6, s1, v2 +; GFX6-NEXT: v_mul_lo_u32 v6, s1, v3 ; GFX6-NEXT: s_ashr_i32 s14, s7, 31 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX6-NEXT: v_mul_lo_u32 v5, s0, v2 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GFX6-NEXT: v_mul_lo_u32 v6, v2, v3 -; GFX6-NEXT: v_mul_hi_u32 v7, v2, v5 -; GFX6-NEXT: v_mul_hi_u32 v8, v2, v3 -; GFX6-NEXT: v_mul_hi_u32 v9, v4, v3 -; GFX6-NEXT: v_mul_lo_u32 v3, v4, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s0, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; GFX6-NEXT: v_mul_lo_u32 v6, v3, v2 +; GFX6-NEXT: v_mul_hi_u32 v7, v3, v5 +; GFX6-NEXT: v_mul_hi_u32 v8, v3, v2 +; GFX6-NEXT: v_mul_hi_u32 v9, v4, v2 +; GFX6-NEXT: v_mul_lo_u32 v2, v4, v2 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GFX6-NEXT: v_mul_lo_u32 v8, v4, v5 @@ -10611,15 +10460,15 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v5, v2 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v4, v5, vcc ; GFX6-NEXT: v_mul_lo_u32 v4, s0, v3 ; GFX6-NEXT: v_mul_hi_u32 v5, s0, v2 ; GFX6-NEXT: v_mul_lo_u32 v6, s1, v2 ; GFX6-NEXT: v_xor_b32_e32 v1, s12, v1 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; GFX6-NEXT: v_mul_lo_u32 v5, s0, v2 ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; GFX6-NEXT: v_mul_lo_u32 v8, v2, v4 @@ -10662,8 +10511,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0 ; GFX6-NEXT: v_mul_lo_u32 v2, s4, v2 ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s7, v3 ; GFX6-NEXT: v_mov_b32_e32 v5, s5 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s6, v2 @@ -10680,19 +10529,19 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[2:3] ; GFX6-NEXT: v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1] ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 -; GFX6-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[0:1] -; GFX6-NEXT: v_mov_b32_e32 v6, s7 -; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[0:1] +; GFX6-NEXT: v_mov_b32_e32 v7, s7 +; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v7, v3, vcc +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 ; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s5, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: v_xor_b32_e32 v2, s14, v2 ; GFX6-NEXT: v_xor_b32_e32 v3, s14, v3 ; GFX6-NEXT: v_mov_b32_e32 v4, s14 @@ -10823,33 +10672,33 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_subb_u32 s2, s4, s13 ; GFX9-NEXT: v_subrev_co_u32_e64 v3, s[0:1], s12, v1 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX9-NEXT: s_subb_u32 s2, s2, 0 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s16 -; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: s_subb_u32 s0, s2, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s5, s15 -; GFX9-NEXT: s_cmp_ge_u32 s0, s13 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s2, s5, s15 +; GFX9-NEXT: s_cmp_ge_u32 s2, s13 +; GFX9-NEXT: v_mov_b32_e32 v5, s16 +; GFX9-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 +; GFX9-NEXT: s_cselect_b32 s3, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 -; GFX9-NEXT: s_cmp_eq_u32 s0, s13 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s1 +; GFX9-NEXT: s_cmp_eq_u32 s2, s13 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GFX9-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; GFX9-NEXT: v_mov_b32_e32 v5, s0 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX9-NEXT: s_ashr_i32 s0, s11, 31 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v6, s2 ; GFX9-NEXT: s_add_u32 s2, s10, s0 ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: s_addc_u32 s3, s11, s0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GFX9-NEXT: s_xor_b64 s[4:5], s[2:3], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s4 ; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, s14, v0 ; GFX9-NEXT: v_xor_b32_e32 v2, s14, v2 ; GFX9-NEXT: v_mac_f32_e32 v1, 0x4f800000, v3 @@ -10960,26 +10809,26 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_subb_u32 s2, s6, s5 ; GFX9-NEXT: v_subrev_co_u32_e64 v6, s[0:1], s4, v3 ; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX9-NEXT: s_subb_u32 s2, s2, 0 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] -; GFX9-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NEXT: s_subb_u32 s0, s2, 0 ; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[0:1] -; GFX9-NEXT: s_subb_u32 s0, s7, s11 -; GFX9-NEXT: s_cmp_ge_u32 s0, s5 -; GFX9-NEXT: s_cselect_b32 s1, -1, 0 +; GFX9-NEXT: s_subb_u32 s2, s7, s11 +; GFX9-NEXT: s_cmp_ge_u32 s2, s5 +; GFX9-NEXT: v_mov_b32_e32 v7, s12 +; GFX9-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 +; GFX9-NEXT: s_cselect_b32 s3, -1, 0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 -; GFX9-NEXT: s_cmp_eq_u32 s0, s5 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_mov_b32_e32 v7, s1 +; GFX9-NEXT: s_cmp_eq_u32 s2, s5 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v8, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; GFX9-NEXT: v_mov_b32_e32 v8, s3 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v8, s2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 ; GFX9-NEXT: v_xor_b32_e32 v3, s10, v5 ; GFX9-NEXT: v_mov_b32_e32 v5, s10 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll index 7a812cef9f9e6..59be0cfb22ec3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll @@ -119,14 +119,15 @@ define void @no_vgprs_to_spill_into() #1 { ; CHECK-LABEL: callee_need_to_spill_fp_exec_to_memory: ; CHECK: %bb.0: -; WAVE32: s_or_saveexec_b32 [[EXEC_COPY:s[0-9]+]], -1 -; WAVE32-NEXT: buffer_store_dword [[RES_VGPR:v[0-9]+]], off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; WAVE32: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; WAVE32: s_xor_saveexec_b32 [[EXEC_COPY:s[0-9]+]], -1 +; WAVE32-NEXT: buffer_store_dword [[RES_VGPR:v[0-9]+]], off, s[0:3], s33 offset:192 ; 4-byte Folded Spill ; WAVE32: s_mov_b32 exec_lo, [[EXEC_COPY]] -; WAVE32-NEXT: v_mov_b32_e32 [[TEMP_VGPR:v[0-9]+]], exec_lo -; WAVE32-NEXT: buffer_store_dword [[TEMP_VGPR]], off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: v_mov_b32_e32 [[TEMP_VGPR:v[0-9]+]], [[FP_SCRATCH_COPY]] +; WAVE32-NEXT: buffer_store_dword [[TEMP_VGPR]], off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32: v_mov_b32_e32 [[TEMP_VGPR]], exec_lo +; WAVE32-NEXT: buffer_store_dword [[TEMP_VGPR]], off, s[0:3], s33 offset:196 ; 4-byte Folded Spill ; WAVE32-NEXT: .cfi_offset 1, 6272 -; WAVE32-NEXT: v_mov_b32_e32 [[TEMP_VGPR]], s33 -; WAVE32-NEXT: buffer_store_dword [[TEMP_VGPR]], off, s[0:3], s32 offset:200 ; 4-byte Folded Spill ; WAVE32: buffer_store_dword v40, off, s[0:3], s33 offset ; WAVE32-COUNT-47: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 ; WAVE32: v_writelane_b32 [[RES_VGPR]], s34, 0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 1dad78ab9b78a..7e86d906f0a3d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -1,16 +1,16 @@ -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -show-mc-encoding -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s -; RUN: llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -march=r600 -mcpu=cypress | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC ; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4 ; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] undef, align 16 @@ -52,14 +52,14 @@ ; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() ; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* ; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 1 -; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !0 +; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !1 ; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 2 -; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !1, !invariant.load !0 +; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !2, !invariant.load !1 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 -; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !3 +; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !3 +; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !3 ; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]] ; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]] @@ -74,11 +74,11 @@ ; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1 +; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !1 +; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !1 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -555,9 +555,12 @@ entry: attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" } attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" } -; HSAOPT: !0 = !{} -; HSAOPT: !1 = !{i32 0, i32 257} -; HSAOPT: !2 = !{i32 0, i32 256} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} -; NOHSAOPT: !0 = !{i32 0, i32 257} -; NOHSAOPT: !1 = !{i32 0, i32 256} +; HSAOPT: !1 = !{} +; HSAOPT: !2 = !{i32 0, i32 257} +; HSAOPT: !3 = !{i32 0, i32 256} + +; NOHSAOPT: !1 = !{i32 0, i32 257} +; NOHSAOPT: !2 = !{i32 0, i32 256} diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 7ef295cf5e995..3e3c81d6f4ca3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -172,7 +172,7 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x25{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: @@ -200,7 +200,7 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: simple_lds_recurse: ; GCN-NEXT: .lds_size: 0x100{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x29{{$}} ; GCN-NEXT: simple_stack: @@ -210,7 +210,7 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x25{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: @@ -227,7 +227,7 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_stack_recurse: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2a{{$}} ; GCN-NEXT: ... diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index b05054d8a03d5..e9e8e01a91782 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -458,7 +458,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR8]] { +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -477,7 +477,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -496,7 +496,7 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %p ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR14:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() @@ -515,7 +515,7 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -593,7 +593,7 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() ; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -611,7 +611,7 @@ define void @use_implicitarg_ptr() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() ; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -628,7 +628,7 @@ define void @func_indirect_use_implicitarg_ptr() #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -751,7 +751,6 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -944,13 +943,13 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 61ba99bc16f7d..c8618cbbc7097 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -431,7 +431,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] { +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -449,7 +449,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* % ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast -; ATTRIBUTOR_HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] { +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32* ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -541,7 +541,7 @@ define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_shared -; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { +; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]]) ; ATTRIBUTOR_HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32 ; ATTRIBUTOR_HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 @@ -562,7 +562,7 @@ define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_private -; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { +; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR12]] { ; ATTRIBUTOR_HSA-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]]) ; ATTRIBUTOR_HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32 ; ATTRIBUTOR_HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 @@ -657,5 +657,6 @@ attributes #1 = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll index 670644e38d4f3..d16e635ee2a64 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 @@ -129,6 +129,9 @@ define amdgpu_kernel void @min_1024_max_1024() #3 { } attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + ; CHECK: amdhsa.kernels: ; CHECK: .max_flat_workgroup_size: 64 ; CHECK: .name: min_64_max_64 diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll index 48945eaf3fd21..205b09da28426 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 @@ -129,6 +129,9 @@ define amdgpu_kernel void @min_1024_max_1024() #3 { } attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + ; HSAMD: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; HSAMD: Version: [ 1, 0 ] ; HSAMD: Kernels: diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index 40463858d7ba6..4f13403ca4439 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -457,8 +457,10 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xa8, 0x14, 0x0e, 0x90, 0xa8, 0x14, 0x16, 0xe4, 0x80, 0x18, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40_lo16, 4352 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xa9, 0x14, 0x0e, 0x90, 0xa9, 0x14, 0x16, 0xe4, 0x80, 0x16, 0xe6, 0x11, 0x94, 0x08, 0xec, 0x20, 0x40 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) @@ -505,17 +507,17 @@ body: | ; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5) ; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0 - ; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1 + 4, addrspace 5) + ; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1 + 4, addrspace 5) ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) - ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5) - ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2 + 4, addrspace 5) + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2 + 4, addrspace 5) ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -536,7 +538,9 @@ body: | ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30, $sgpr31, $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll deleted file mode 100644 index 337dcfc652bd0..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll +++ /dev/null @@ -1,97 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s - -; Subtargets must wait for outstanding memory instructions before a barrier if -; they cannot back off of the barrier. - -define void @back_off_barrier_no_fence(i32* %in, i32* %out) #0 { -; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence: -; GFX9-NO-BACKOFF: ; %bb.0: -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: s_barrier -; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence: -; GFX9-BACKOFF: ; %bb.0: -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX9-BACKOFF-NEXT: s_barrier -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: -; GFX10-BACKOFF: ; %bb.0: -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX10-BACKOFF-NEXT: s_barrier -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32* %in - call void @llvm.amdgcn.s.barrier() - store i32 %load, i32* %out - ret void -} - -define void @back_off_barrier_with_fence(i32* %in, i32* %out) #0 { -; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence: -; GFX9-NO-BACKOFF: ; %bb.0: -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: s_barrier -; GFX9-NO-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence: -; GFX9-BACKOFF: ; %bb.0: -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-BACKOFF-NEXT: s_barrier -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: -; GFX10-BACKOFF: ; %bb.0: -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: s_barrier -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: buffer_gl0_inv -; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 -; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32* %in - fence syncscope("workgroup") release - call void @llvm.amdgcn.s.barrier() - fence syncscope("workgroup") acquire - store i32 %load, i32* %out - ret void -} - -declare void @llvm.amdgcn.s.barrier() - -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir index 69cfbeeb9a491..83649f1c8cec9 100644 --- a/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir +++ b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir @@ -21,10 +21,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY killed $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[V_CMP_NE_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $exec_lo, implicit-def $exec_lo + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PRED_COPY1]], killed [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[PRED_COPY1]], implicit-def dead $scc ; CHECK-NEXT: $exec_lo = S_MOV_B32_term killed [[S_AND_B32_]] ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 @@ -34,8 +34,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %15, 0, implicit $exec - ; CHECK-NEXT: %7:vgpr_32, dead %8:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %7, %subreg.sub1 + ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed [[V_ADDC_U32_e64_]], %subreg.sub1 ; CHECK-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8), addrspace 1) @@ -55,14 +55,14 @@ body: | ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec - ; CHECK-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; CHECK-NEXT: dead [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 killed [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]] ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir new file mode 100644 index 0000000000000..4a2bbc8637355 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir @@ -0,0 +1,180 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s + +--- +name: branch_no_terminators +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '$sgpr12' } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' +body: | + ; CHECK-LABEL: name: branch_no_terminators + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.5(0x30000000) + ; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0 + ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr12 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0 + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1 + ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.1, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol + ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags() , implicit-def $scc + ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags() , implicit-def $scc, implicit $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1 + ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr82 + ; CHECK-NEXT: $sgpr82 = S_MOV_B32 killed $sgpr83 + ; CHECK-NEXT: $sgpr83 = S_MOV_B32 killed $sgpr84 + ; CHECK-NEXT: $sgpr84 = S_MOV_B32 killed $sgpr85 + ; CHECK-NEXT: $sgpr101 = S_MOV_B32 killed $vcc_lo + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1 + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4 + ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol + ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags() , implicit-def $scc + ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags() , implicit-def $scc, implicit $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5 + ; CHECK-NEXT: $sgpr5 = S_MOV_B32 killed $sgpr6 + ; CHECK-NEXT: $sgpr6 = S_MOV_B32 killed $sgpr7 + ; CHECK-NEXT: $sgpr7 = S_MOV_B32 killed $sgpr8 + ; CHECK-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr9 + ; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10 + ; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11 + ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1 + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6 + ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: S_WAITCNT 3952 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.4(0x30000000) + liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + + S_WAITCNT 0 + $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec + $exec = S_MOV_B64 killed $sgpr4_sgpr5 + $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0 + $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0 + $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0 + $sgpr81 = S_MOV_B32 killed $sgpr12 + $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0 + $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1 + S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1 + $sgpr81 = S_MOV_B32 killed $sgpr82 + $sgpr82 = S_MOV_B32 killed $sgpr83 + $sgpr83 = S_MOV_B32 killed $sgpr84 + $sgpr84 = S_MOV_B32 killed $sgpr85 + $sgpr101 = S_MOV_B32 killed $vcc_lo + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4 + S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.2, implicit killed $scc + + bb.3: + liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + $sgpr4 = S_MOV_B32 killed $sgpr5 + $sgpr5 = S_MOV_B32 killed $sgpr6 + $sgpr6 = S_MOV_B32 killed $sgpr7 + $sgpr7 = S_MOV_B32 killed $sgpr8 + $sgpr8 = S_MOV_B32 killed $sgpr9 + $sgpr9 = S_MOV_B32 killed $sgpr10 + $sgpr10 = S_MOV_B32 killed $sgpr11 + S_SETPC_B64 $sgpr4_sgpr5 + + bb.4: + liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + $sgpr101 = V_READLANE_B32 $vgpr1, 6 + $sgpr100 = V_READLANE_B32 $vgpr1, 5 + $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec + $exec = S_MOV_B64 killed $sgpr4_sgpr5 + S_WAITCNT 3952 + S_SETPC_B64_return undef $sgpr30_sgpr31 + +... diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index a272b7243f5d4..1f1b0539eaa73 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -898,7 +898,7 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-LABEL: spill_func: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] @@ -1701,7 +1701,7 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s35, v0, 2 ; CHECK-NEXT: v_readlane_b32 s34, v0, 1 ; CHECK-NEXT: v_readlane_b32 s33, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll index 71b917bd76926..759f3e91472d5 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll @@ -9,27 +9,27 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -40,29 +40,29 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -73,29 +73,29 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -106,33 +106,33 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908_GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll index e6d9b4c22804b..ba2d5f5695d2b 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll @@ -8,29 +8,29 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret float %ret @@ -41,31 +41,31 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret float %ret @@ -76,31 +76,31 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret float %ret @@ -111,35 +111,35 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX11-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX11-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX11-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll index 3c94c951d8339..7b054ad1870ed 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll @@ -7,17 +7,17 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i3 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[PRED_COPY7]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -28,18 +28,18 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -50,18 +50,18 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -72,20 +72,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i3 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[PRED_COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -96,21 +96,21 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY8]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY9]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY8]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY9]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret @@ -121,22 +121,22 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY10]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret @@ -147,22 +147,22 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY7]], %subreg.sub0, [[PRED_COPY6]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[PRED_COPY8]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY9]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY10]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret @@ -173,24 +173,24 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY10]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY11]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY8]], %subreg.sub0, [[PRED_COPY7]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[PRED_COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY10]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY11]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll index 92258eb682487..0417138dc8a59 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -8,27 +8,27 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -39,29 +39,29 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,29 +72,29 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -105,33 +105,33 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX908-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX908-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll index e4028583cf234..df2a516d8dbff 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll @@ -7,15 +7,15 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY4]], %subreg.sub0, [[PRED_COPY3]], %subreg.sub1, [[PRED_COPY2]], %subreg.sub2, [[PRED_COPY1]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[PRED_COPY5]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -26,16 +26,16 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret @@ -46,16 +46,16 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, [[PRED_COPY4]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[PRED_COPY6]], [[PRED_COPY1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret @@ -66,18 +66,18 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[PRED_COPY6]], %subreg.sub0, [[PRED_COPY5]], %subreg.sub1, [[PRED_COPY4]], %subreg.sub2, [[PRED_COPY3]], %subreg.sub3 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[PRED_COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[PRED_COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll index d75d8b4300a6a..e56648537670d 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -6,191 +6,191 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe ; GCN: bb.0.bb.0: ; GCN-NEXT: liveins: $sgpr0, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 16, align 1, addrspace 7) ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 32, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 48, align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 64, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 80, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 96, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 96, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 112, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY2]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 128, align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY2]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY3]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 144, align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY3]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY4]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 160, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY4]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY5]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 176, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY5]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 192, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY6]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 208, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY7]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY8]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY8]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY9]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY9]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY10]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY11]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY10]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 224, align 1, addrspace 7) + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY12:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY11]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY12]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY13]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY13:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY13]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY14]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY14:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY14]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY15]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY16]], [[S_LOAD_DWORDX4_IMM]], [[COPY17]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY15:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY15]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from unknown-address + 240, align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY16:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY17:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY16]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY17]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY18:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY21]], [[S_LOAD_DWORDX4_IMM]], [[COPY22]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY20:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 256, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[PRED_COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY21:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY22:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY21]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY22]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[PRED_COPY]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY23]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY23:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY23]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY24]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY24:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY24]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[COPY27]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY25:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY25]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on unknown-address + 272, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY26:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY27:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY26]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY27]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY28:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[PRED_COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY29:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[PRED_COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[COPY32]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY30:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[PRED_COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 288, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY31:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY32:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[PRED_COPY31]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY32]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY33:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[PRED_COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY34:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[PRED_COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) ; GCN-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY35]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN-NEXT: [[COPY37:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[COPY37]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[PRED_COPY35:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[PRED_COPY35]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into unknown-address + 304, align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY36:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[PRED_COPY37:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[PRED_COPY36]], [[S_LOAD_DWORDX4_IMM]], [[PRED_COPY37]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[PRED_COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; GCN-NEXT: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0 diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll index 996d18c236c83..1212e21b839ab 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll @@ -8,12 +8,12 @@ @alias = hidden alias void (), void ()* @aliasee_default ; ALL-LABEL: {{^}}kernel: -; GFX908: .amdhsa_next_free_vgpr 41 +; GFX908: .amdhsa_next_free_vgpr 32 ; GFX908-NEXT: .amdhsa_next_free_sgpr 33 -; GFX90A: .amdhsa_next_free_vgpr 71 +; GFX90A: .amdhsa_next_free_vgpr 59 ; GFX90A-NEXT: .amdhsa_next_free_sgpr 33 -; GFX90A-NEXT: .amdhsa_accum_offset 44 +; GFX90A-NEXT: .amdhsa_accum_offset 32 define amdgpu_kernel void @kernel() #0 { bb: call void @alias() #2 diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll index 729e6dfc1cb7e..cfbef98db3ab8 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll @@ -9,7 +9,7 @@ ; The parent kernel has a higher VGPR usage than the possible callees. ; CHECK-LABEL: {{^}}kernel1: -; CHECK: .amdhsa_next_free_vgpr 42 +; CHECK: .amdhsa_next_free_vgpr 41 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33 define amdgpu_kernel void @kernel1() #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 27223ffa9be04..120a801e1edb9 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-V5 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. @@ -14,13 +14,15 @@ define void @use_vcc() #1 { } ; GCN-LABEL: {{^}}indirect_use_vcc: -; GCN: v_writelane_b32 v40, s33, 2 +; GCN: s_mov_b32 s4, s33 +; GCN: v_writelane_b32 v40, s4, 2 ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 ; GCN: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s31, v40, 1 -; GCN: v_readlane_b32 s33, v40, 2 +; GCN: v_readlane_b32 s4, v40, 2 +; GCN: s_mov_b32 s33, s4 ; GCN: s_setpc_b64 s[30:31] ; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 41 @@ -283,3 +285,6 @@ entry: attributes #0 = { nounwind noinline norecurse } attributes #1 = { nounwind noinline norecurse } attributes #2 = { nounwind noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index d904124795013..3d3809e31b32b 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -22,9 +22,10 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ } ; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 ; MUBUF: buffer_store_dword ; FLATSCR: scratch_store_dword -; GCN: v_writelane_b32 v40, s33, 4 +; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 ; GCN: v_writelane_b32 v40, s34, 0 ; GCN: v_writelane_b32 v40, s35, 1 ; GCN: v_writelane_b32 v40, s30, 2 @@ -39,9 +40,10 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ ; GCN: v_readlane_b32 s35, v40, 1 ; GCN: v_readlane_b32 s34, v40, 0 -; GCN: v_readlane_b32 s33, v40, 4 +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 ; MUBUF: buffer_load_dword ; FLATSCR: scratch_load_dword +; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { call void @external_void_func_void() @@ -51,19 +53,21 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa } ; GCN-LABEL: {{^}}test_func_call_external_void_funcx2: +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN: s_mov_b32 s33, s32 ; MUBUF: buffer_store_dword v40 ; FLATSCR: scratch_store_dword off, v40 -; GCN: v_writelane_b32 v40, s33, 4 +; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 -; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 ; GCN: s_swappc_b64 ; GCN-NEXT: s_swappc_b64 -; GCN: v_readlane_b32 s33, v40, 4 +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 ; MUBUF: buffer_load_dword v40 ; FLATSCR: scratch_load_dword v40 +; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] define void @test_func_call_external_void_funcx2() #0 { call void @external_void_func_void() call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll index 516657b9560e1..fe9a6d8b32b89 100644 --- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll @@ -5,20 +5,19 @@ define amdgpu_kernel void @call_memory_arg_load(i32 addrspace(3)* %ptr, i32) #0 { ; GCN-LABEL: call_memory_arg_load: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s6, s[6:7], 0x0 -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 +; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: ds_read_b32 v0, v0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] ; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, func@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, func@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: s_endpgm %vgpr = load volatile i32, i32 addrspace(3)* %ptr call void @func(i32 %vgpr) @@ -29,21 +28,20 @@ define amdgpu_kernel void @call_memory_arg_load(i32 addrspace(3)* %ptr, i32) #0 define amdgpu_kernel void @call_memory_no_dep(i32 addrspace(1)* %ptr, i32) #0 { ; GCN-LABEL: call_memory_no_dep: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_addc_u32 s1, s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: global_store_dword v0, v0, s[6:7] -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] +; GCN-NEXT: global_store_dword v0, v0, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_endpgm store i32 0, i32 addrspace(1)* %ptr call void @func(i32 0) @@ -54,19 +52,18 @@ define amdgpu_kernel void @call_memory_no_dep(i32 addrspace(1)* %ptr, i32) #0 { define amdgpu_kernel void @call_no_wait_after_call(i32 addrspace(1)* %ptr, i32) #0 { ; GCN-LABEL: call_no_wait_after_call: ; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, func@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, func@rel32@hi+12 ; GCN-NEXT: v_mov_b32_e32 v40, 0 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: global_store_dword v40, v40, s[34:35] ; GCN-NEXT: s_endpgm call void @func(i32 0) @@ -77,19 +74,18 @@ define amdgpu_kernel void @call_no_wait_after_call(i32 addrspace(1)* %ptr, i32) define amdgpu_kernel void @call_no_wait_after_call_return_val(i32 addrspace(1)* %ptr, i32) #0 { ; GCN-LABEL: call_no_wait_after_call_return_val: ; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_getpc_b64 s[8:9] -; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12 +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, func.return@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, func.return@rel32@hi+12 ; GCN-NEXT: v_mov_b32_e32 v40, 0 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: global_store_dword v40, v0, s[34:35] ; GCN-NEXT: s_endpgm %rv = call i32 @func.return(i32 0) @@ -101,19 +97,18 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(i32 addrspace(1)* define amdgpu_kernel void @call_got_load(i32 addrspace(1)* %ptr, i32) #0 { ; GCN-LABEL: call_got_load: ; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 -; GCN-NEXT: s_add_u32 s0, s0, s11 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_getpc_b64 s[6:7] -; GCN-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 -; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, got.func@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, got.func@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: s_endpgm call void @got.func(i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 05c8acb2169e7..5061677188c83 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -85,12 +85,13 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN-LABEL: {{^}}callee_with_stack_and_call: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt -; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-DAG: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], 2 ; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}} ; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}} ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} @@ -105,13 +106,14 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]] ; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]] -; MUBUF: s_addk_i32 s32, 0xfc00{{$}} -; FLATSCR: s_add_i32 s32, s32, -16{{$}} -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 +; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; MUBUF: s_addk_i32 s32, 0xfc00{{$}} +; FLATSCR: s_add_i32 s32, s32, -16{{$}} +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -130,13 +132,15 @@ define void @callee_with_stack_and_call() #0 { ; GCN-LABEL: {{^}}callee_no_stack_with_call: ; GCN: s_waitcnt +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; MUBUF-DAG: s_addk_i32 s32, 0x400 ; FLATSCR-DAG: s_add_i32 s32, s32, 16 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]] +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], [[FP_SPILL_LANE:[0-9]+]] ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 @@ -145,13 +149,14 @@ define void @callee_with_stack_and_call() #0 { ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0 ; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1 -; MUBUF: s_addk_i32 s32, 0xfc00 -; FLATSCR: s_add_i32 s32, s32, -16 -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]] +; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], [[FP_SPILL_LANE]] ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; MUBUF: s_addk_i32 s32, 0xfc00 +; FLATSCR: s_add_i32 s32, s32, -16 +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_no_stack_with_call() #0 { @@ -210,7 +215,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; GCN-LABEL: {{^}}spill_only_csr_sgpr: ; GCN: s_waitcnt -; GCN-NEXT: s_or_saveexec_b64 +; GCN-NEXT: s_xor_saveexec_b64 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, @@ -219,7 +224,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; GCN-NEXT: ; clobber s42 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: v_readlane_b32 s42, v0, 0 -; GCN-NEXT: s_or_saveexec_b64 +; GCN-NEXT: s_xor_saveexec_b64 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, @@ -263,12 +268,12 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; Use a copy to a free SGPR instead of introducing a second CSR VGPR. ; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr: ; GCN: s_waitcnt -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 63 +; GCN-NEXT: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GCN-COUNT-63: v_writelane_b32 [[CSR_VGPR]] @@ -276,13 +281,13 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:4 ; GCN: ;;#ASMSTART +; GCN: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN-NEXT: v_readlane_b32 s33, v0, 63 -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; GCN-NEXT: s_mov_b32 s33, [[TMP_SGPR]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @last_lane_vgpr_for_fp_csr() #1 { @@ -304,14 +309,12 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; Use a copy to a free SGPR instead of introducing a second CSR VGPR. ; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr: ; GCN: s_waitcnt -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; FLATSCR: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 -; FLATSCR-NEXT: s_mov_b32 s33, s32 -; MUBUF: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 -; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; FLATSCR: s_add_i32 s32, s32, 16 ; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill @@ -323,13 +326,13 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v0 +; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 ; FLATSCR-NEXT: s_add_i32 s32, s32, -16 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @no_new_vgpr_for_fp_csr() #1 { @@ -376,12 +379,12 @@ define void @realign_stack_no_fp_elim() #1 { ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp: ; GCN: s_waitcnt -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-NEXT: s_mov_b32 vcc_lo, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; MUBUF: s_addk_i32 s32, 0x300 ; FLATSCR: s_add_i32 s32, s32, 12 @@ -393,13 +396,13 @@ define void @realign_stack_no_fp_elim() #1 { ; GCN: ;;#ASMSTART ; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 ; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; MUBUF: s_addk_i32 s32, 0xfd00 ; FLATSCR: s_add_i32 s32, s32, -12 -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; GCN-NEXT: s_mov_b32 s33, vcc_lo ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp() #1 { @@ -419,26 +422,28 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; Need a new CSR VGPR to satisfy the FP spill. ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr: ; GCN: s_waitcnt +; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill +; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 0 -; GCN-NEXT: s_mov_b32 s33, s32 -; MUBUF: s_addk_i32 s32, 0x300{{$}} -; FLATSCR: s_add_i32 s32, s32, 12{{$}} +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], [[FP_SCRATCH_COPY]], 0 ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword +; MUBUF: s_addk_i32 s32, 0x300{{$}} +; FLATSCR: s_add_i32 s32, s32, 12{{$}} ; GCN: ;;#ASMSTART -; MUBUF: s_addk_i32 s32, 0xfd00{{$}} -; FLATSCR: s_add_i32 s32, s32, -12{{$}} -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 0 +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSR_VGPR]], 0 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload +; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; MUBUF: s_addk_i32 s32, 0xfd00{{$}} +; FLATSCR: s_add_i32 s32, s32, -12{{$}} +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { @@ -465,29 +470,29 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; register is needed to access the CSR VGPR slot. ; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset: ; GCN: s_waitcnt +; GCN-NEXT: s_mov_b32 vcc_lo, s33 +; GCN-DAG: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100 +; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x1004 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill -; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-DAG: s_mov_b32 s33, s32 ; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} ; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword ; GCN: ;;#ASMSTART -; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}} -; FLATSCR: s_addk_i32 s32, 0xeff4{{$}} -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 -; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004 +; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x1004 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}} +; FLATSCR: s_addk_i32 s32, 0xeff4{{$}} +; GCN-NEXT: s_mov_b32 s33, vcc_lo ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 { @@ -521,7 +526,7 @@ define internal void @local_empty_func() #0 { ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: v_writelane_b32 v0, s33, 2 +; GCN: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33 ; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 @@ -530,7 +535,7 @@ define internal void @local_empty_func() #0 { ; GCN: s_swappc_b64 ; MUBUF: s_addk_i32 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN: v_readlane_b32 s33, v0, 2 +; GCN: s_mov_b32 s33, [[TMP_SGPR]] define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -540,14 +545,16 @@ define void @ipra_call_with_stack() #0 { ; With no free registers, we must spill the FP to memory. ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory: -; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 -; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 ; 4-byte Folded Spill +; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 ; FLATSCR: s_mov_b32 s0, s33 ; GCN: s_mov_b32 s33, s32 -; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Reload -; FLATSCR: s_mov_b32 s33, s0 +; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], [[FP_SCRATCH_COPY]] +; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s33 ; 4-byte Folded Reload ; MUBUF: s_waitcnt vmcnt(0) -; MUBUF: v_readfirstlane_b32 s33, [[TMP_VGPR2]] +; MUBUF: v_readfirstlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[TMP_VGPR2]] +; MUBUF: s_mov_b32 s33, [[FP_SCRATCH_COPY]] +; FLATSCR: s_mov_b32 s33, s0 ; GCN: s_setpc_b64 ; MUBUF: ScratchSize: 8 ; FLATSCR: ScratchSize: 0 @@ -570,18 +577,19 @@ define void @callee_need_to_spill_fp_to_memory() #3 { ; need to spill the FP to memory if there are no free lanes in the reserved ; VGPR. ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory_full_reserved_vgpr: -; MUBUF: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN: s_mov_b32 s33, s32 +; MUBUF: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF: s_mov_b64 exec, [[COPY_EXEC1]] -; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 -; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]] +; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], [[FP_SCRATCH_COPY]] +; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s33 offset:[[OFF:[0-9]+]] ; GCN-NOT: v_writelane_b32 v40, s33 -; MUBUF: s_mov_b32 s33, s32 -; FLATSCR: s_mov_b32 s33, s0 ; GCN-NOT: v_readlane_b32 s33, v40 -; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]] -; MUBUF: v_readfirstlane_b32 s33, [[TMP_VGPR2]] -; MUBUF: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s33 offset:[[OFF]] +; MUBUF: v_readfirstlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[TMP_VGPR2]] +; MUBUF: s_xor_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF: s_mov_b64 exec, [[COPY_EXEC2]] +; MUBUF: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs", @@ -609,14 +617,14 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; Make sure that the FP save happens after restoring exec from the same ; register. ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_reg: -; GCN-NOT: v_writelane_b32 v40, s33 -; FLATSCR: s_or_saveexec_b64 s[0:1], -1 -; FLATSCR: s_mov_b64 exec, s[0:1] -; FLATSCR: s_mov_b32 s0, s33 +; FLATSCR: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 ; FLATSCR: s_mov_b32 s33, s32 -; FLATSCR: s_mov_b32 s33, s0 -; FLATSCR: s_or_saveexec_b64 s[0:1], -1 +; GCN-NOT: v_writelane_b32 v40, s33 +; FLATSCR: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; FLATSCR: s_mov_b64 exec, [[COPY_EXEC0]] +; FLATSCR: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NOT: v_readlane_b32 s33, v40 +; FLATSCR: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 define void @callee_need_to_spill_fp_to_reg() #1 { call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs", @@ -642,12 +650,14 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; If the size of the offset exceeds the MUBUF offset field we need another ; scratch VGPR to hold the offset. ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset -; MUBUF: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 +; MUBUF: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40100 ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill -; MUBUF: v_mov_b32_e32 v0, s33 +; MUBUF: v_mov_b32_e32 v0, [[FP_SCRATCH_COPY]] ; GCN-NOT: v_mov_b32_e32 v0, 0x100c -; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s33, 0x40200 ; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; FLATSCR: v_mov_b32_e32 v0, 0 ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1000 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll index 3f0c627641ac9..a0a27c37b457e 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -591,3 +591,6 @@ declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind noinline } attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll index eb4ed8b2acef3..176d0f7939e83 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -419,15 +419,15 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: ; GCN-DAG: s_addk_i32 s32, 0x400{{$}} -; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_load_dword [[TMP_REG:v[0-9]+]], off, s[0:3], s33{{$}} ; GCN: buffer_store_dword [[TMP_REG]], off, s[0:3], s32{{$}} ; GCN: s_swappc_b64 +; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_addk_i32 s32, 0xfc00{{$}} -; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index c63830a00a010..fd798edcc3200 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt @@ -425,15 +425,15 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: ; GCN-DAG: s_addk_i32 s32, 0x400{{$}} -; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: s_swappc_b64 +; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_addk_i32 s32, 0xfc00{{$}} -; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, @@ -550,6 +550,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 } ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: + ; FIXED-ABI-NOT: v31 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}} ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}} @@ -802,3 +803,6 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind noinline } attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll index 3c23aa5cb5c1b..a65a41b452e50 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -68,80 +68,76 @@ entry: define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 { ; GFX803-LABEL: test_kern_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_i32 s12, s12, s17 -; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GFX803-NEXT: s_add_i32 s10, s10, s15 +; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX803-NEXT: s_add_u32 s0, s0, s17 +; GFX803-NEXT: s_add_u32 s0, s0, s15 ; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 -; GFX803-NEXT: s_mov_b32 s13, s15 -; GFX803-NEXT: s_mov_b32 s12, s14 +; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX803-NEXT: v_or_b32_e32 v31, v0, v2 -; GFX803-NEXT: s_mov_b32 s14, s16 +; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX803-NEXT: s_mov_b32 s32, 0 -; GFX803-NEXT: s_getpc_b64 s[18:19] -; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX803-NEXT: s_getpc_b64 s[16:17] +; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX803-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_kern_call: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GFX900-NEXT: s_add_u32 s0, s0, s17 +; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX900-NEXT: s_add_u32 s0, s0, s15 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX900-NEXT: s_mov_b32 s32, 0 -; GFX900-NEXT: s_getpc_b64 s[18:19] -; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_kern_call: ; GFX1010: ; %bb.0: ; %entry -; GFX1010-NEXT: s_add_u32 s12, s12, s17 +; GFX1010-NEXT: s_add_u32 s10, s10, s15 ; GFX1010-NEXT: s_mov_b32 s32, 0 -; GFX1010-NEXT: s_addc_u32 s13, s13, 0 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 +; GFX1010-NEXT: s_addc_u32 s11, s11, 0 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX1010-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX1010-NEXT: s_add_u32 s0, s0, s17 +; GFX1010-NEXT: s_add_u32 s0, s0, s15 ; GFX1010-NEXT: s_addc_u32 s1, s1, 0 -; GFX1010-NEXT: s_mov_b32 s13, s15 -; GFX1010-NEXT: s_mov_b32 s12, s14 +; GFX1010-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX1010-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1010-NEXT: s_mov_b32 s14, s16 -; GFX1010-NEXT: s_getpc_b64 s[18:19] -; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX1010-NEXT: s_getpc_b64 s[16:17] +; GFX1010-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX1010-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX1010-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1010-NEXT: s_endpgm ; ; GFX1100-LABEL: test_kern_call: ; GFX1100: ; %bb.0: ; %entry ; GFX1100-NEXT: v_mov_b32_e32 v31, v0 ; GFX1100-NEXT: s_mov_b32 s12, s13 -; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1100-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX1100-NEXT: s_mov_b32 s13, s14 ; GFX1100-NEXT: s_mov_b32 s14, s15 ; GFX1100-NEXT: s_mov_b32 s32, 0 -; GFX1100-NEXT: s_getpc_b64 s[16:17] -; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 -; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1100-NEXT: s_getpc_b64 s[6:7] +; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4 +; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12 +; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX1100-NEXT: s_endpgm entry: @@ -152,72 +148,69 @@ entry: define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { ; GFX803-LABEL: test_kern_stack_and_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_i32 s12, s12, s17 -; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GFX803-NEXT: s_add_i32 s10, s10, s15 +; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX803-NEXT: s_add_u32 s0, s0, s17 +; GFX803-NEXT: s_add_u32 s0, s0, s15 ; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 -; GFX803-NEXT: s_mov_b32 s13, s15 -; GFX803-NEXT: s_mov_b32 s12, s14 +; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX803-NEXT: v_mov_b32_e32 v3, 0 ; GFX803-NEXT: v_or_b32_e32 v31, v0, v2 -; GFX803-NEXT: s_mov_b32 s14, s16 +; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX803-NEXT: s_movk_i32 s32, 0x400 ; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; GFX803-NEXT: s_waitcnt vmcnt(0) -; GFX803-NEXT: s_getpc_b64 s[18:19] -; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX803-NEXT: s_getpc_b64 s[16:17] +; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX803-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_kern_stack_and_call: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GFX900-NEXT: s_add_u32 s0, s0, s17 +; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX900-NEXT: s_add_u32 s0, s0, s15 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX900-NEXT: s_movk_i32 s32, 0x400 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: s_getpc_b64 s[18:19] -; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_kern_stack_and_call: ; GFX1010: ; %bb.0: ; %entry -; GFX1010-NEXT: s_add_u32 s12, s12, s17 +; GFX1010-NEXT: s_add_u32 s10, s10, s15 ; GFX1010-NEXT: s_movk_i32 s32, 0x200 -; GFX1010-NEXT: s_addc_u32 s13, s13, 0 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 +; GFX1010-NEXT: s_addc_u32 s11, s11, 0 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX1010-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1010-NEXT: v_mov_b32_e32 v3, 0 -; GFX1010-NEXT: s_add_u32 s0, s0, s17 +; GFX1010-NEXT: s_add_u32 s0, s0, s15 ; GFX1010-NEXT: s_addc_u32 s1, s1, 0 -; GFX1010-NEXT: s_mov_b32 s13, s15 +; GFX1010-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1010-NEXT: s_mov_b32 s12, s14 -; GFX1010-NEXT: s_mov_b32 s14, s16 +; GFX1010-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1010-NEXT: s_getpc_b64 s[18:19] -; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX1010-NEXT: s_getpc_b64 s[16:17] +; GFX1010-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX1010-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX1010-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1010-NEXT: s_endpgm ; ; GFX1100-LABEL: test_kern_stack_and_call: @@ -225,19 +218,18 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { ; GFX1100-NEXT: v_mov_b32_e32 v1, 0 ; GFX1100-NEXT: v_mov_b32_e32 v31, v0 ; GFX1100-NEXT: s_mov_b32 s12, s13 -; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1100-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX1100-NEXT: s_mov_b32 s13, s14 ; GFX1100-NEXT: s_mov_b32 s14, s15 ; GFX1100-NEXT: s_mov_b32 s32, 16 ; GFX1100-NEXT: scratch_store_b32 off, v1, off offset:4 dlc ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1100-NEXT: s_getpc_b64 s[16:17] -; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 -; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1100-NEXT: s_getpc_b64 s[6:7] +; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4 +; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12 +; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm @@ -321,84 +313,80 @@ entry: define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_i32 s12, s12, s17 -; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GFX803-NEXT: s_add_i32 s10, s10, s15 +; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX803-NEXT: s_add_u32 s0, s0, s17 +; GFX803-NEXT: s_add_u32 s0, s0, s15 ; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 -; GFX803-NEXT: s_mov_b32 s13, s15 -; GFX803-NEXT: s_mov_b32 s12, s14 +; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX803-NEXT: v_or_b32_e32 v31, v0, v2 -; GFX803-NEXT: s_mov_b32 s14, s16 +; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX803-NEXT: s_mov_b32 s32, 0 ; GFX803-NEXT: s_mov_b32 s33, 0 -; GFX803-NEXT: s_getpc_b64 s[18:19] -; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX803-NEXT: s_getpc_b64 s[16:17] +; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX803-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_force_fp_kern_call: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GFX900-NEXT: s_add_u32 s0, s0, s17 +; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX900-NEXT: s_add_u32 s0, s0, s15 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX900-NEXT: s_mov_b32 s32, 0 ; GFX900-NEXT: s_mov_b32 s33, 0 -; GFX900-NEXT: s_getpc_b64 s[18:19] -; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_force_fp_kern_call: ; GFX1010: ; %bb.0: ; %entry -; GFX1010-NEXT: s_add_u32 s12, s12, s17 +; GFX1010-NEXT: s_add_u32 s10, s10, s15 ; GFX1010-NEXT: s_mov_b32 s32, 0 ; GFX1010-NEXT: s_mov_b32 s33, 0 -; GFX1010-NEXT: s_addc_u32 s13, s13, 0 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 +; GFX1010-NEXT: s_addc_u32 s11, s11, 0 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX1010-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX1010-NEXT: s_add_u32 s0, s0, s17 +; GFX1010-NEXT: s_add_u32 s0, s0, s15 ; GFX1010-NEXT: s_addc_u32 s1, s1, 0 -; GFX1010-NEXT: s_mov_b32 s13, s15 -; GFX1010-NEXT: s_mov_b32 s12, s14 +; GFX1010-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX1010-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1010-NEXT: s_mov_b32 s14, s16 -; GFX1010-NEXT: s_getpc_b64 s[18:19] -; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX1010-NEXT: s_getpc_b64 s[16:17] +; GFX1010-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX1010-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX1010-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1010-NEXT: s_endpgm ; ; GFX1100-LABEL: test_force_fp_kern_call: ; GFX1100: ; %bb.0: ; %entry ; GFX1100-NEXT: v_mov_b32_e32 v31, v0 ; GFX1100-NEXT: s_mov_b32 s12, s13 -; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1100-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX1100-NEXT: s_mov_b32 s13, s14 ; GFX1100-NEXT: s_mov_b32 s14, s15 ; GFX1100-NEXT: s_mov_b32 s32, 0 ; GFX1100-NEXT: s_mov_b32 s33, 0 -; GFX1100-NEXT: s_getpc_b64 s[16:17] -; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 -; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1100-NEXT: s_getpc_b64 s[6:7] +; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4 +; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12 +; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX1100-NEXT: s_endpgm ; GFX1010-NEXT s_add_u32 s12, s12, s17 ; GFX1010-NEXT s_mov_b32 s32, 0 @@ -427,75 +415,72 @@ entry: define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_stack_and_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_i32 s12, s12, s17 -; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GFX803-NEXT: s_add_i32 s10, s10, s15 +; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 ; GFX803-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX803-NEXT: s_add_u32 s0, s0, s17 +; GFX803-NEXT: s_add_u32 s0, s0, s15 ; GFX803-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX803-NEXT: s_mov_b32 s33, 0 -; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 -; GFX803-NEXT: s_mov_b32 s13, s15 -; GFX803-NEXT: s_mov_b32 s12, s14 +; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX803-NEXT: v_mov_b32_e32 v3, 0 ; GFX803-NEXT: v_or_b32_e32 v31, v0, v2 -; GFX803-NEXT: s_mov_b32 s14, s16 +; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX803-NEXT: s_movk_i32 s32, 0x400 ; GFX803-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; GFX803-NEXT: s_waitcnt vmcnt(0) -; GFX803-NEXT: s_getpc_b64 s[18:19] -; GFX803-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX803-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX803-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX803-NEXT: s_getpc_b64 s[16:17] +; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX803-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_force_fp_kern_stack_and_call: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GFX900-NEXT: s_add_u32 s0, s0, s17 +; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX900-NEXT: s_add_u32 s0, s0, s15 ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX900-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX900-NEXT: s_mov_b32 s33, 0 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX900-NEXT: s_movk_i32 s32, 0x400 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: s_getpc_b64 s[18:19] -; GFX900-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX900-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX900-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_force_fp_kern_stack_and_call: ; GFX1010: ; %bb.0: ; %entry -; GFX1010-NEXT: s_add_u32 s12, s12, s17 +; GFX1010-NEXT: s_add_u32 s10, s10, s15 ; GFX1010-NEXT: s_movk_i32 s32, 0x200 ; GFX1010-NEXT: s_mov_b32 s33, 0 -; GFX1010-NEXT: s_addc_u32 s13, s13, 0 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 -; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 +; GFX1010-NEXT: s_addc_u32 s11, s11, 0 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10 +; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11 ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX1010-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1010-NEXT: v_mov_b32_e32 v3, 0 -; GFX1010-NEXT: s_add_u32 s0, s0, s17 +; GFX1010-NEXT: s_add_u32 s0, s0, s15 ; GFX1010-NEXT: s_addc_u32 s1, s1, 0 -; GFX1010-NEXT: s_mov_b32 s13, s15 +; GFX1010-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1010-NEXT: s_mov_b32 s12, s14 -; GFX1010-NEXT: s_mov_b32 s14, s16 +; GFX1010-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1010-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; GFX1010-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1010-NEXT: s_getpc_b64 s[18:19] -; GFX1010-NEXT: s_add_u32 s18, s18, ex@rel32@lo+4 -; GFX1010-NEXT: s_addc_u32 s19, s19, ex@rel32@hi+12 -; GFX1010-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX1010-NEXT: s_getpc_b64 s[16:17] +; GFX1010-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX1010-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX1010-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1010-NEXT: s_endpgm ; ; GFX1100-LABEL: test_force_fp_kern_stack_and_call: @@ -504,19 +489,18 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add ; GFX1100-NEXT: v_mov_b32_e32 v31, v0 ; GFX1100-NEXT: s_mov_b32 s33, 0 ; GFX1100-NEXT: s_mov_b32 s12, s13 -; GFX1100-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX1100-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX1100-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX1100-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX1100-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX1100-NEXT: s_mov_b32 s13, s14 ; GFX1100-NEXT: s_mov_b32 s14, s15 ; GFX1100-NEXT: s_mov_b32 s32, 16 ; GFX1100-NEXT: scratch_store_b32 off, v1, s33 offset:4 dlc ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1100-NEXT: s_getpc_b64 s[16:17] -; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 -; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 -; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX1100-NEXT: s_getpc_b64 s[6:7] +; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4 +; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12 +; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index 5077ddf894c31..0a4b8b5cc3791 100644 --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -30,22 +30,43 @@ define amdgpu_kernel void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind ; ; GCN_DBG-LABEL: test_loop: ; GCN_DBG: ; %bb.0: ; %entry +; GCN_DBG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN_DBG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN_DBG-NEXT: s_mov_b32 s10, -1 +; GCN_DBG-NEXT: s_mov_b32 s11, 0xe8f000 +; GCN_DBG-NEXT: s_add_u32 s8, s8, s3 +; GCN_DBG-NEXT: s_addc_u32 s9, s9, 0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 ; GCN_DBG-NEXT: s_load_dword s2, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v2, s2, 0 +; GCN_DBG-NEXT: v_writelane_b32 v0, s2, 0 ; GCN_DBG-NEXT: s_load_dword s1, s[0:1], 0xa ; GCN_DBG-NEXT: s_mov_b32 s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s2, -1 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: s_cmp_lg_u32 s1, s2 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_mov_b64 s[4:5], exec +; GCN_DBG-NEXT: s_mov_b64 exec, -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN_DBG-NEXT: ; %bb.1: ; %for.exit +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB0_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 @@ -60,13 +81,25 @@ define amdgpu_kernel void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 ; GCN_DBG-NEXT: ds_write_b32 v0, v1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB0_2 ; GCN_DBG-NEXT: ; %bb.3: ; %DummyReturnBlock +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm entry: %cmp = icmp eq i32 %n, -1 @@ -105,18 +138,38 @@ define amdgpu_kernel void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nou ; ; GCN_DBG-LABEL: loop_const_true: ; GCN_DBG: ; %bb.0: ; %entry +; GCN_DBG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN_DBG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN_DBG-NEXT: s_mov_b32 s10, -1 +; GCN_DBG-NEXT: s_mov_b32 s11, 0xe8f000 +; GCN_DBG-NEXT: s_add_u32 s8, s8, s3 +; GCN_DBG-NEXT: s_addc_u32 s9, s9, 0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB1_2 ; GCN_DBG-NEXT: .LBB1_1: ; %for.exit +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB1_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 @@ -131,11 +184,18 @@ define amdgpu_kernel void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nou ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 ; GCN_DBG-NEXT: ds_write_b32 v0, v1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], 0 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB1_1 ; GCN_DBG-NEXT: s_branch .LBB1_2 entry: @@ -170,18 +230,38 @@ define amdgpu_kernel void @loop_const_false(float addrspace(3)* %ptr, i32 %n) no ; ; GCN_DBG-LABEL: loop_const_false: ; GCN_DBG: ; %bb.0: ; %entry +; GCN_DBG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN_DBG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN_DBG-NEXT: s_mov_b32 s10, -1 +; GCN_DBG-NEXT: s_mov_b32 s11, 0xe8f000 +; GCN_DBG-NEXT: s_add_u32 s8, s8, s3 +; GCN_DBG-NEXT: s_addc_u32 s9, s9, 0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB2_2 ; GCN_DBG-NEXT: .LBB2_1: ; %for.exit +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB2_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 @@ -196,11 +276,18 @@ define amdgpu_kernel void @loop_const_false(float addrspace(3)* %ptr, i32 %n) no ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 ; GCN_DBG-NEXT: ds_write_b32 v0, v1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB2_1 ; GCN_DBG-NEXT: s_branch .LBB2_2 entry: @@ -236,18 +323,38 @@ define amdgpu_kernel void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) no ; ; GCN_DBG-LABEL: loop_const_undef: ; GCN_DBG: ; %bb.0: ; %entry +; GCN_DBG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN_DBG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN_DBG-NEXT: s_mov_b32 s10, -1 +; GCN_DBG-NEXT: s_mov_b32 s11, 0xe8f000 +; GCN_DBG-NEXT: s_add_u32 s8, s8, s3 +; GCN_DBG-NEXT: s_addc_u32 s9, s9, 0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB3_2 ; GCN_DBG-NEXT: .LBB3_1: ; %for.exit +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB3_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 @@ -262,9 +369,16 @@ define amdgpu_kernel void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) no ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 ; GCN_DBG-NEXT: ds_write_b32 v0, v1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB3_1 ; GCN_DBG-NEXT: s_branch .LBB3_2 entry: @@ -314,32 +428,60 @@ define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind ; ; GCN_DBG-LABEL: loop_arg_0: ; GCN_DBG: ; %bb.0: ; %entry +; GCN_DBG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN_DBG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN_DBG-NEXT: s_mov_b32 s10, -1 +; GCN_DBG-NEXT: s_mov_b32 s11, 0xe8f000 +; GCN_DBG-NEXT: s_add_u32 s8, s8, s3 +; GCN_DBG-NEXT: s_addc_u32 s9, s9, 0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] +; GCN_DBG-NEXT: s_waitcnt expcnt(0) ; GCN_DBG-NEXT: v_mov_b32_e32 v0, 0 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: ds_read_u8 v0, v0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_and_b32 s0, 1, s0 ; GCN_DBG-NEXT: s_cmp_eq_u32 s0, 1 ; GCN_DBG-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 -; GCN_DBG-NEXT: v_writelane_b32 v2, s1, 2 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v0, s1, 2 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_branch .LBB4_2 ; GCN_DBG-NEXT: .LBB4_1: ; %for.exit +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] +; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB4_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 3 -; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 1 -; GCN_DBG-NEXT: v_readlane_b32 s3, v2, 2 -; GCN_DBG-NEXT: v_readlane_b32 s4, v2, 0 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: s_waitcnt expcnt(0) +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 3 +; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 1 +; GCN_DBG-NEXT: v_readlane_b32 s3, v0, 2 +; GCN_DBG-NEXT: v_readlane_b32 s4, v0, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s4 @@ -354,10 +496,17 @@ define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind ; GCN_DBG-NEXT: s_mov_b32 m0, -1 ; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 ; GCN_DBG-NEXT: ds_write_b32 v0, v1 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 +; GCN_DBG-NEXT: s_waitcnt vmcnt(0) +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_DBG-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB4_1 ; GCN_DBG-NEXT: s_branch .LBB4_2 entry: diff --git a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir index aaf342fcd2ae1..2d8be30aae67a 100644 --- a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir @@ -17,13 +17,13 @@ body: | ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF1]] - ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[COPY]], implicit-def $vcc_lo, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[DEF3]] - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF1]] + ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[PRED_COPY]], implicit-def $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF3]] + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[PRED_COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]] - ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_2]] + ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[PRED_COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec %0:sreg_32 = S_MOV_B32 681 %1:sreg_32 = IMPLICIT_DEF %2:sreg_32 = IMPLICIT_DEF @@ -54,11 +54,11 @@ body: | ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF2]], [[DEF]], implicit-def $vcc_lo, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF4]] - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[DEF4]] + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[PRED_COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec ; GCN-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF5]] - ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF5]] + ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[PRED_COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:sreg_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir index c77ce31b12607..10bd3f81496e2 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir @@ -44,7 +44,7 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64 = PRED_COPY [[COPY1]] ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index f81c46ee2439b..2077b0f87b485 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -19,14 +19,14 @@ ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -84,14 +84,14 @@ bb.outer.end: ; preds = %bb.outer.then, %bb. ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -161,7 +161,7 @@ bb.outer.end: ; preds = %bb.inner.then, %bb ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -170,6 +170,9 @@ bb.outer.end: ; preds = %bb.inner.then, %bb ; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]] ; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]] @@ -180,7 +183,7 @@ bb.outer.end: ; preds = %bb.inner.then, %bb ; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] +; GCN-O0-DAG: s_xor_b64 exec, exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: ; GCN-O0: store_dword @@ -260,6 +263,9 @@ bb.outer.end: ; preds = %bb, %bb.then, %b ; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[VGPR]] +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]] ; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]] @@ -270,14 +276,14 @@ bb.outer.end: ; preds = %bb, %bb.then, %b ; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] +; GCN-O0-DAG: s_xor_b64 exec, exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: ; GCN-O0: store_dword ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -287,7 +293,7 @@ bb.outer.end: ; preds = %bb, %bb.then, %b ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -357,7 +363,7 @@ bb.outer.end: ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec ; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]] -; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] +; GCN-O0-DAG: s_and_b64 s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: @@ -419,61 +425,87 @@ bb.end: ; preds = %bb.then, %bb ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]: -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] -; GCN-O0: buffer_load_dword -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0: buffer_load_dword [[RESTORED_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] +; GCN-O0: buffer_load_dword +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[RESTORED_VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] +; GCN-O0: buffer_load_dword [[RESTORED_1_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 ; GCN-O0: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] ; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_1_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[RESTORED_1_VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] ; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]] +; GCN-O0: buffer_load_dword [[RESTORED_2_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_2_VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_2_VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]] ; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_2_VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_2_VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[RESTORED_2_VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]] ; GCN-O0: {{^}}[[FLOW2]]: -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]] +; GCN-O0: buffer_load_dword [[RESTORED_3_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_3_VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_3_VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]] ; GCN-O0: s_branch [[FLOW:.LBB[0-9_]+]] ; GCN-O0: {{^}}[[FLOW]]: ; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_3_VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_3_VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[RESTORED_3_VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]] ; GCN-O0: ; %bb.{{[0-9]+}}: -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0: buffer_load_dword [[RESTORED_4_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_4_VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_4_VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]] +; GCN-O0-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; GCN-O0-NEXT: buffer_store_dword [[RESTORED_4_VGPR]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 +; GCN-O0-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; GCN-O0: {{^}}[[FLOW3]]: +; GCN-O0: buffer_load_dword [[RESTORED_5_VGPR:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:68 ; GCN-O0-COUNT-4: buffer_load_dword -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_5_VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_5_VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_5_VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[RESTORED_5_VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]] ; GCN-O0: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN-O0-COUNT-2: s_mov_b64 -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] -; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_5_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_5_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_5_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] +; GCN-O0-DAG: v_writelane_b32 [[RESTORED_5_VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] ; GCN-O0-COUNT-4: buffer_store_dword ; GCN-O0: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] ; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index a8b97c7932580..382eb4b6493d3 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -12,16 +12,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -30,7 +30,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: S_ENDPGM 0 bb.0: @@ -67,16 +67,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -89,7 +89,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -123,16 +123,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -146,7 +146,7 @@ body: | ; GCN-NEXT: DBG_VALUE ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -183,16 +183,16 @@ body: | ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -208,7 +208,7 @@ body: | ; GCN-NEXT: KILL [[DEF]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -247,16 +247,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -270,10 +270,10 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[S_BREV_B32_:%[0-9]+]]:sgpr_32 = S_BREV_B32 [[DEF]] ; GCN-NEXT: KILL [[DEF]] - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY [[S_BREV_B32_]] ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -311,16 +311,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -331,11 +331,11 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: [[S_BREV_B64_:%[0-9]+]]:sreg_64 = S_BREV_B64 $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -370,8 +370,8 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -379,8 +379,8 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -391,11 +391,11 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub2 + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -430,16 +430,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} @@ -453,7 +453,7 @@ body: | ; GCN-NEXT: S_BRANCH %bb.4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -530,9 +530,9 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} @@ -552,8 +552,8 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -564,7 +564,7 @@ body: | ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY1]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: ; GCN-NEXT: $exec = S_OR_B64 $exec, [[S_AND_B64_1]], implicit-def $scc @@ -616,8 +616,8 @@ body: | ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} @@ -629,7 +629,7 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.4(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) @@ -643,9 +643,9 @@ body: | ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.0(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[COPY1]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY1]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: S_BRANCH %bb.0 @@ -687,16 +687,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} @@ -707,7 +707,7 @@ body: | ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.5(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: ; GCN-NEXT: S_ENDPGM 0 @@ -750,16 +750,16 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN-NEXT: {{ $}} @@ -774,7 +774,7 @@ body: | ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.6(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.6: ; GCN-NEXT: successors: %bb.4(0x80000000) @@ -825,8 +825,8 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF]], implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -836,8 +836,8 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF1]], implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], killed [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 @@ -847,8 +847,8 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF2]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY2]], killed [[V_CMP_EQ_U32_e64_2]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_2]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 @@ -858,8 +858,8 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF3]], implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], killed [[V_CMP_EQ_U32_e64_3]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_3]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; GCN-NEXT: S_BRANCH %bb.4 @@ -872,7 +872,7 @@ body: | ; GCN-NEXT: bb.7: ; GCN-NEXT: successors: %bb.8(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY2]], implicit-def $scc ; GCN-NEXT: S_BRANCH %bb.8 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.8: @@ -885,9 +885,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 0, killed [[DEF4]], implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY4]], killed [[V_CMP_EQ_U32_e64_4]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_4]], [[PRED_COPY4]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_4]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; GCN-NEXT: S_BRANCH %bb.11 @@ -912,7 +912,7 @@ body: | ; GCN-NEXT: S_BRANCH %bb.10 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.14: - ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[PRED_COPY]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.14 diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 7891cded195d5..5dd40aefbe146 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn --amdhsa-code-object-version=2 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn --amdhsa-code-object-version=2 -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s ; Verify registers used for tracking exec mask changes when all ; registers are spilled at the end of the block. The SGPR spill @@ -10,7 +10,7 @@ ; GCN-LABEL: {{^}}divergent_if_endif: -; VGPR: workitem_private_segment_byte_size = 12{{$}} +; VGPR: workitem_private_segment_byte_size = 16{{$}} ; GCN: {{^}}; %bb.0: @@ -82,7 +82,7 @@ endif: } ; GCN-LABEL: {{^}}divergent_loop: -; VGPR: workitem_private_segment_byte_size = 16{{$}} +; VGPR: workitem_private_segment_byte_size = 20{{$}} ; GCN: {{^}}; %bb.0: ; GCN-DAG: s_mov_b32 m0, -1 @@ -270,3 +270,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir new file mode 100644 index 0000000000000..7efdbcba81a97 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir @@ -0,0 +1,417 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX908 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -start-before=prologepilog %s -o - | FileCheck --check-prefix=GFX90A %s + +--- | + + define amdgpu_kernel void @test_spill() #0 { + ; GFX908-LABEL: test_spill: + ; GFX908: ; %bb.0: + ; GFX908-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 + ; GFX908-NEXT: ; implicit-def: $vgpr0 + ; GFX908-NEXT: ; implicit-def: $vgpr1 + ; GFX908-NEXT: ; implicit-def: $vgpr2 + ; GFX908-NEXT: ; implicit-def: $vgpr3 + ; GFX908-NEXT: ; implicit-def: $vgpr4 + ; GFX908-NEXT: ; implicit-def: $vgpr5 + ; GFX908-NEXT: ; implicit-def: $vgpr6 + ; GFX908-NEXT: ; implicit-def: $vgpr7 + ; GFX908-NEXT: ; implicit-def: $vgpr8 + ; GFX908-NEXT: ; implicit-def: $vgpr9 + ; GFX908-NEXT: ; implicit-def: $vgpr10 + ; GFX908-NEXT: ; implicit-def: $vgpr11 + ; GFX908-NEXT: ; implicit-def: $vgpr12 + ; GFX908-NEXT: ; implicit-def: $vgpr13 + ; GFX908-NEXT: ; implicit-def: $vgpr14 + ; GFX908-NEXT: ; implicit-def: $vgpr15 + ; GFX908-NEXT: ; implicit-def: $vgpr16 + ; GFX908-NEXT: ; implicit-def: $vgpr17 + ; GFX908-NEXT: ; implicit-def: $vgpr18 + ; GFX908-NEXT: ; implicit-def: $vgpr19 + ; GFX908-NEXT: ; implicit-def: $vgpr20 + ; GFX908-NEXT: ; implicit-def: $vgpr21 + ; GFX908-NEXT: ; implicit-def: $vgpr22 + ; GFX908-NEXT: ; implicit-def: $vgpr23 + ; GFX908-NEXT: ; implicit-def: $vgpr24 + ; GFX908-NEXT: ; implicit-def: $vgpr25 + ; GFX908-NEXT: ; implicit-def: $vgpr26 + ; GFX908-NEXT: ; implicit-def: $vgpr27 + ; GFX908-NEXT: ; implicit-def: $vgpr28 + ; GFX908-NEXT: ; implicit-def: $vgpr29 + ; GFX908-NEXT: ; implicit-def: $vgpr30 + ; GFX908-NEXT: ; implicit-def: $vgpr31 + ; GFX908-NEXT: ; implicit-def: $vgpr32 + ; GFX908-NEXT: ; implicit-def: $vgpr33 + ; GFX908-NEXT: ; implicit-def: $vgpr34 + ; GFX908-NEXT: ; implicit-def: $vgpr35 + ; GFX908-NEXT: ; implicit-def: $vgpr36 + ; GFX908-NEXT: ; implicit-def: $vgpr37 + ; GFX908-NEXT: ; implicit-def: $vgpr38 + ; GFX908-NEXT: ; implicit-def: $vgpr39 + ; GFX908-NEXT: ; implicit-def: $vgpr40 + ; GFX908-NEXT: ; implicit-def: $vgpr41 + ; GFX908-NEXT: ; implicit-def: $vgpr42 + ; GFX908-NEXT: ; implicit-def: $vgpr43 + ; GFX908-NEXT: ; implicit-def: $vgpr44 + ; GFX908-NEXT: ; implicit-def: $vgpr45 + ; GFX908-NEXT: ; implicit-def: $vgpr46 + ; GFX908-NEXT: ; implicit-def: $vgpr47 + ; GFX908-NEXT: ; implicit-def: $vgpr48 + ; GFX908-NEXT: ; implicit-def: $vgpr49 + ; GFX908-NEXT: ; implicit-def: $vgpr50 + ; GFX908-NEXT: ; implicit-def: $vgpr51 + ; GFX908-NEXT: ; implicit-def: $vgpr52 + ; GFX908-NEXT: ; implicit-def: $vgpr53 + ; GFX908-NEXT: ; implicit-def: $vgpr54 + ; GFX908-NEXT: ; implicit-def: $vgpr55 + ; GFX908-NEXT: ; implicit-def: $vgpr56 + ; GFX908-NEXT: ; implicit-def: $vgpr57 + ; GFX908-NEXT: ; implicit-def: $vgpr58 + ; GFX908-NEXT: ; implicit-def: $vgpr59 + ; GFX908-NEXT: ; implicit-def: $vgpr60 + ; GFX908-NEXT: ; implicit-def: $vgpr61 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a64, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a65, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a66, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a67, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a68, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a69, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a70, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a71, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a72, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a73, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a74, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a75, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a76, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a77, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a78, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: v_accvgpr_write_b32 a79, v63 + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 0 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a96 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a97 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a98 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a99 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a100 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a101 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a102 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a103 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a104 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a105 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a106 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a107 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a108 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a109 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill + ; GFX908-NEXT: v_accvgpr_read_b32 v63, a111 ; Reload Reuse + ; GFX908-NEXT: s_nop 1 + ; GFX908-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill + ; + ; GFX90A-LABEL: test_spill: + ; GFX90A: ; %bb.0: + ; GFX90A-NEXT: ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 + ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; GFX90A-NEXT: v_accvgpr_mov_b32 a64, a96 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a65, a97 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a66, a98 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a67, a99 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a68, a100 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a69, a101 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a70, a102 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a71, a103 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a72, a104 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a73, a105 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a74, a106 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a75, a107 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a76, a108 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a77, a109 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a78, a110 + ; GFX90A-NEXT: v_accvgpr_mov_b32 a79, a111 + ; GFX90A-NEXT: v_accvgpr_read_b32 v77, a96 ; Reload Reuse + ; GFX90A-NEXT: ; implicit-def: $vgpr0 + ; GFX90A-NEXT: ; implicit-def: $vgpr1 + ; GFX90A-NEXT: ; implicit-def: $vgpr2 + ; GFX90A-NEXT: ; implicit-def: $vgpr3 + ; GFX90A-NEXT: ; implicit-def: $vgpr4 + ; GFX90A-NEXT: ; implicit-def: $vgpr5 + ; GFX90A-NEXT: ; implicit-def: $vgpr6 + ; GFX90A-NEXT: ; implicit-def: $vgpr7 + ; GFX90A-NEXT: ; implicit-def: $vgpr8 + ; GFX90A-NEXT: ; implicit-def: $vgpr9 + ; GFX90A-NEXT: ; implicit-def: $vgpr10 + ; GFX90A-NEXT: ; implicit-def: $vgpr11 + ; GFX90A-NEXT: ; implicit-def: $vgpr12 + ; GFX90A-NEXT: ; implicit-def: $vgpr13 + ; GFX90A-NEXT: ; implicit-def: $vgpr14 + ; GFX90A-NEXT: ; implicit-def: $vgpr15 + ; GFX90A-NEXT: ; implicit-def: $vgpr16 + ; GFX90A-NEXT: ; implicit-def: $vgpr17 + ; GFX90A-NEXT: ; implicit-def: $vgpr18 + ; GFX90A-NEXT: ; implicit-def: $vgpr19 + ; GFX90A-NEXT: ; implicit-def: $vgpr20 + ; GFX90A-NEXT: ; implicit-def: $vgpr21 + ; GFX90A-NEXT: ; implicit-def: $vgpr22 + ; GFX90A-NEXT: ; implicit-def: $vgpr23 + ; GFX90A-NEXT: ; implicit-def: $vgpr24 + ; GFX90A-NEXT: ; implicit-def: $vgpr25 + ; GFX90A-NEXT: ; implicit-def: $vgpr26 + ; GFX90A-NEXT: ; implicit-def: $vgpr27 + ; GFX90A-NEXT: ; implicit-def: $vgpr28 + ; GFX90A-NEXT: ; implicit-def: $vgpr29 + ; GFX90A-NEXT: ; implicit-def: $vgpr30 + ; GFX90A-NEXT: ; implicit-def: $vgpr31 + ; GFX90A-NEXT: ; implicit-def: $vgpr32 + ; GFX90A-NEXT: ; implicit-def: $vgpr33 + ; GFX90A-NEXT: ; implicit-def: $vgpr34 + ; GFX90A-NEXT: ; implicit-def: $vgpr35 + ; GFX90A-NEXT: ; implicit-def: $vgpr36 + ; GFX90A-NEXT: ; implicit-def: $vgpr37 + ; GFX90A-NEXT: ; implicit-def: $vgpr38 + ; GFX90A-NEXT: ; implicit-def: $vgpr39 + ; GFX90A-NEXT: ; implicit-def: $vgpr40 + ; GFX90A-NEXT: ; implicit-def: $vgpr41 + ; GFX90A-NEXT: ; implicit-def: $vgpr42 + ; GFX90A-NEXT: ; implicit-def: $vgpr43 + ; GFX90A-NEXT: ; implicit-def: $vgpr44 + ; GFX90A-NEXT: ; implicit-def: $vgpr45 + ; GFX90A-NEXT: ; implicit-def: $vgpr46 + ; GFX90A-NEXT: ; implicit-def: $vgpr47 + ; GFX90A-NEXT: ; implicit-def: $vgpr48 + ; GFX90A-NEXT: ; implicit-def: $vgpr49 + ; GFX90A-NEXT: ; implicit-def: $vgpr50 + ; GFX90A-NEXT: ; implicit-def: $vgpr51 + ; GFX90A-NEXT: ; implicit-def: $vgpr52 + ; GFX90A-NEXT: ; implicit-def: $vgpr53 + ; GFX90A-NEXT: ; implicit-def: $vgpr54 + ; GFX90A-NEXT: ; implicit-def: $vgpr55 + ; GFX90A-NEXT: ; implicit-def: $vgpr56 + ; GFX90A-NEXT: ; implicit-def: $vgpr57 + ; GFX90A-NEXT: ; implicit-def: $vgpr58 + ; GFX90A-NEXT: ; implicit-def: $vgpr59 + ; GFX90A-NEXT: ; implicit-def: $vgpr60 + ; GFX90A-NEXT: ; implicit-def: $vgpr61 + ; GFX90A-NEXT: v_accvgpr_read_b32 v76, a97 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v75, a98 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v74, a99 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v73, a100 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v72, a101 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v71, a102 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v70, a103 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v69, a104 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v68, a105 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v67, a106 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v66, a107 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v65, a108 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v64, a109 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v63, a110 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v62, a111 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v93, a96 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v92, a97 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v91, a98 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v90, a99 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v89, a100 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v88, a101 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v87, a102 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v86, a103 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v85, a104 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v84, a105 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v83, a106 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v82, a107 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v81, a108 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v80, a109 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v79, a110 ; Reload Reuse + ; GFX90A-NEXT: v_accvgpr_read_b32 v78, a111 ; Reload Reuse + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="4,4" } + +... +--- +name: test_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 } + +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr8 = IMPLICIT_DEF + $vgpr9 = IMPLICIT_DEF + $vgpr10 = IMPLICIT_DEF + $vgpr11 = IMPLICIT_DEF + $vgpr12 = IMPLICIT_DEF + $vgpr13 = IMPLICIT_DEF + $vgpr14 = IMPLICIT_DEF + $vgpr15 = IMPLICIT_DEF + $vgpr16 = IMPLICIT_DEF + $vgpr17 = IMPLICIT_DEF + $vgpr18 = IMPLICIT_DEF + $vgpr19 = IMPLICIT_DEF + $vgpr20 = IMPLICIT_DEF + $vgpr21 = IMPLICIT_DEF + $vgpr22 = IMPLICIT_DEF + $vgpr23 = IMPLICIT_DEF + $vgpr24 = IMPLICIT_DEF + $vgpr25 = IMPLICIT_DEF + $vgpr26 = IMPLICIT_DEF + $vgpr27 = IMPLICIT_DEF + $vgpr28 = IMPLICIT_DEF + $vgpr29 = IMPLICIT_DEF + $vgpr30 = IMPLICIT_DEF + $vgpr31 = IMPLICIT_DEF + $vgpr32 = IMPLICIT_DEF + $vgpr33 = IMPLICIT_DEF + $vgpr34 = IMPLICIT_DEF + $vgpr35 = IMPLICIT_DEF + $vgpr36 = IMPLICIT_DEF + $vgpr37 = IMPLICIT_DEF + $vgpr38 = IMPLICIT_DEF + $vgpr39 = IMPLICIT_DEF + $vgpr40 = IMPLICIT_DEF + $vgpr41 = IMPLICIT_DEF + $vgpr42 = IMPLICIT_DEF + $vgpr43 = IMPLICIT_DEF + $vgpr44 = IMPLICIT_DEF + $vgpr45 = IMPLICIT_DEF + $vgpr46 = IMPLICIT_DEF + $vgpr47 = IMPLICIT_DEF + $vgpr48 = IMPLICIT_DEF + $vgpr49 = IMPLICIT_DEF + $vgpr50 = IMPLICIT_DEF + $vgpr51 = IMPLICIT_DEF + $vgpr52 = IMPLICIT_DEF + $vgpr53 = IMPLICIT_DEF + $vgpr54 = IMPLICIT_DEF + $vgpr55 = IMPLICIT_DEF + $vgpr56 = IMPLICIT_DEF + $vgpr57 = IMPLICIT_DEF + $vgpr58 = IMPLICIT_DEF + $vgpr59 = IMPLICIT_DEF + $vgpr60 = IMPLICIT_DEF + $vgpr61 = IMPLICIT_DEF + + $agpr64_agpr65_agpr66_agpr67_agpr68_agpr69_agpr70_agpr71_agpr72_agpr73_agpr74_agpr75_agpr76_agpr77_agpr78_agpr79 = COPY $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, implicit $exec + SI_SPILL_AV512_SAVE killed $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV512_SAVE $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) +... diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index fb9295aa05961..cd8fe2a7a6e7e 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -27,11 +27,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-LABEL: call_split_type_used_outside_block_v2f32: ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -41,11 +42,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb0: @@ -61,11 +63,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-LABEL: call_split_type_used_outside_block_v3f32: ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -75,11 +78,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb0: @@ -95,11 +99,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-LABEL: call_split_type_used_outside_block_v4f16: ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -109,11 +114,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb0: @@ -129,11 +135,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-LABEL: call_split_type_used_outside_block_struct: ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -144,11 +151,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb0: @@ -168,30 +176,28 @@ bb1: define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 { ; GCN-LABEL: v3i16_registers: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GCN-NEXT: s_load_dword s12, s[8:9], 0x0 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GCN-NEXT: s_add_u32 s0, s0, s17 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GCN-NEXT: s_mov_b64 s[10:11], s[8:9] +; GCN-NEXT: s_load_dword s8, s[6:7], 0x0 +; GCN-NEXT: s_add_u32 s0, s0, s15 ; GCN-NEXT: s_addc_u32 s1, s1, 0 ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_bitcmp1_b32 s12, 0 -; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0 -; GCN-NEXT: s_and_b64 vcc, exec, s[12:13] +; GCN-NEXT: s_bitcmp1_b32 s8, 0 +; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-NEXT: s_and_b64 vcc, exec, s[8:9] ; GCN-NEXT: s_cbranch_vccnz .LBB4_2 ; GCN-NEXT: ; %bb.1: ; %if.else -; GCN-NEXT: s_add_u32 s8, s8, 8 +; GCN-NEXT: s_add_u32 s8, s6, 8 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_addc_u32 s9, s9, 0 +; GCN-NEXT: s_addc_u32 s9, s7, 0 ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_getpc_b64 s[18:19] -; GCN-NEXT: s_add_u32 s18, s18, func_v3i16@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN-NEXT: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, s6, func_v3i16@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s7, s7, func_v3i16@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_branch .LBB4_3 ; GCN-NEXT: .LBB4_2: ; GCN-NEXT: s_mov_b32 s4, 0 @@ -221,30 +227,28 @@ if.end: ; preds = %if.else, %if.then define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 { ; GCN-LABEL: v3f16_registers: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GCN-NEXT: s_load_dword s12, s[8:9], 0x0 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GCN-NEXT: s_add_u32 s0, s0, s17 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GCN-NEXT: s_mov_b64 s[10:11], s[8:9] +; GCN-NEXT: s_load_dword s8, s[6:7], 0x0 +; GCN-NEXT: s_add_u32 s0, s0, s15 ; GCN-NEXT: s_addc_u32 s1, s1, 0 ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_bitcmp1_b32 s12, 0 -; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0 -; GCN-NEXT: s_and_b64 vcc, exec, s[12:13] +; GCN-NEXT: s_bitcmp1_b32 s8, 0 +; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-NEXT: s_and_b64 vcc, exec, s[8:9] ; GCN-NEXT: s_cbranch_vccnz .LBB5_2 ; GCN-NEXT: ; %bb.1: ; %if.else -; GCN-NEXT: s_add_u32 s8, s8, 8 +; GCN-NEXT: s_add_u32 s8, s6, 8 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_addc_u32 s9, s9, 0 +; GCN-NEXT: s_addc_u32 s9, s7, 0 ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_getpc_b64 s[18:19] -; GCN-NEXT: s_add_u32 s18, s18, func_v3f16@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN-NEXT: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, s6, func_v3f16@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s7, s7, func_v3f16@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_branch .LBB5_3 ; GCN-NEXT: .LBB5_2: ; GCN-NEXT: s_mov_b32 s4, 0 diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index 16e9b0b9f32c5..5c5714a7541d5 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -19,8 +19,8 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 6a9cff4181434..3694beceb0fee 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -992,7 +992,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a ; ; GFX9-LABEL: load_i8_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] @@ -1073,7 +1073,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias ; ; GFX9-LABEL: load_v2i8_to_v2f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1162,7 +1162,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias ; ; GFX9-LABEL: load_v3i8_to_v3f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1254,7 +1254,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias ; ; GFX9-LABEL: load_v4i8_to_v4f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1378,7 +1378,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; ; GFX9-LABEL: load_v4i8_to_v4f32_unaligned: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1543,14 +1543,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; ; GFX9-LABEL: load_v4i8_to_v4f32_2_uses: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: v_mov_b32_e32 v6, 9 +; GFX9-NEXT: s_movk_i32 s4, 0x900 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v4, v0, s[0:1] -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9-NEXT: s_movk_i32 s4, 0x900 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v4 ; GFX9-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 @@ -1740,7 +1740,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias ; ; GFX9-LABEL: load_v7i8_to_v7f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX9-NEXT: v_mov_b32_e32 v10, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1880,7 +1880,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias ; ; GFX9-LABEL: load_v8i8_to_v8f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1980,7 +1980,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias ; ; GFX9-LABEL: i8_zext_inreg_i32_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2064,7 +2064,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias ; ; GFX9-LABEL: i8_zext_inreg_hi1_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2144,7 +2144,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, ; ; GFX9-LABEL: i8_zext_i32_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] @@ -2257,7 +2257,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no ; ; GFX9-LABEL: v4i8_zext_v4i32_to_v4f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2356,7 +2356,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out ; ; GFX9-LABEL: extract_byte0_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2436,7 +2436,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out ; ; GFX9-LABEL: extract_byte1_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2517,7 +2517,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out ; ; GFX9-LABEL: extract_byte2_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2598,7 +2598,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out ; ; GFX9-LABEL: extract_byte3_to_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2686,7 +2686,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float a ; ; GFX9-LABEL: cvt_ubyte0_or_multiuse: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll new file mode 100644 index 0000000000000..53acbb6a7bceb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -enable-var-scope %s +; +; This code is used to trigger the following dag node, with different return type and vector element type: i16 extract_vec_elt v, 0 + +define amdgpu_kernel void @eggs(i1 %arg, ptr addrspace(1) %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6, ptr %arg7, ptr %arg8, ptr %arg9) { +; CHECK-LABEL: eggs: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x0 +; CHECK-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x8 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp0_b32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb10 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[8:9] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_lshrrev_b32_e32 v7, 8, v0 +; CHECK-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; CHECK-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; CHECK-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; CHECK-NEXT: v_lshrrev_b32_e32 v2, 24, v1 +; CHECK-NEXT: s_branch .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_mov_b32_e32 v7, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: .LBB0_3: ; %bb41 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x48 +; CHECK-NEXT: v_mov_b32_e32 v8, s10 +; CHECK-NEXT: v_mov_b32_e32 v9, s11 +; CHECK-NEXT: v_mov_b32_e32 v10, s12 +; CHECK-NEXT: v_mov_b32_e32 v11, s13 +; CHECK-NEXT: v_mov_b32_e32 v12, s14 +; CHECK-NEXT: v_mov_b32_e32 v13, s15 +; CHECK-NEXT: v_mov_b32_e32 v14, s16 +; CHECK-NEXT: v_mov_b32_e32 v15, s17 +; CHECK-NEXT: v_mov_b32_e32 v16, s18 +; CHECK-NEXT: v_mov_b32_e32 v17, s19 +; CHECK-NEXT: v_mov_b32_e32 v18, s20 +; CHECK-NEXT: v_mov_b32_e32 v19, s21 +; CHECK-NEXT: v_mov_b32_e32 v20, s22 +; CHECK-NEXT: v_mov_b32_e32 v21, s23 +; CHECK-NEXT: flat_store_byte v[8:9], v0 +; CHECK-NEXT: flat_store_byte v[10:11], v7 +; CHECK-NEXT: flat_store_byte v[12:13], v6 +; CHECK-NEXT: flat_store_byte v[14:15], v5 +; CHECK-NEXT: flat_store_byte v[16:17], v1 +; CHECK-NEXT: flat_store_byte v[18:19], v4 +; CHECK-NEXT: flat_store_byte v[20:21], v3 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; CHECK-NEXT: flat_store_byte v[0:1], v2 +; CHECK-NEXT: s_endpgm +bb: + br i1 %arg, label %bb10, label %bb41 + +bb10: ; preds = %bb + %tmp12 = load <1 x i8>, ptr addrspace(1) %arg1 + %tmp13 = getelementptr i8, ptr addrspace(1) %arg1, i64 1 + %tmp16 = load <1 x i8>, ptr addrspace(1) %tmp13 + %tmp17 = getelementptr i8, ptr addrspace(1) %arg1, i64 2 + %tmp20 = load <1 x i8>, ptr addrspace(1) %tmp17 + %tmp21 = getelementptr i8, ptr addrspace(1) %arg1, i64 3 + %tmp24 = load <1 x i8>, ptr addrspace(1) %tmp21 + %tmp25 = getelementptr i8, ptr addrspace(1) %arg1, i64 4 + %tmp28 = load <1 x i8>, ptr addrspace(1) %tmp25 + %tmp29 = getelementptr i8, ptr addrspace(1) %arg1, i64 5 + %tmp32 = load <1 x i8>, ptr addrspace(1) %tmp29 + %tmp33 = getelementptr i8, ptr addrspace(1) %arg1, i64 6 + %tmp36 = load <1 x i8>, ptr addrspace(1) %tmp33 + %tmp37 = getelementptr i8, ptr addrspace(1) %arg1, i64 7 + %tmp40 = load <1 x i8>, ptr addrspace(1) %tmp37 + br label %bb41 + +bb41: ; preds = %bb10, %bb + %tmp42 = phi <1 x i8> [ %tmp40, %bb10 ], [ zeroinitializer, %bb ] + %tmp43 = phi <1 x i8> [ %tmp36, %bb10 ], [ zeroinitializer, %bb ] + %tmp44 = phi <1 x i8> [ %tmp32, %bb10 ], [ zeroinitializer, %bb ] + %tmp45 = phi <1 x i8> [ %tmp28, %bb10 ], [ zeroinitializer, %bb ] + %tmp46 = phi <1 x i8> [ %tmp24, %bb10 ], [ zeroinitializer, %bb ] + %tmp47 = phi <1 x i8> [ %tmp20, %bb10 ], [ zeroinitializer, %bb ] + %tmp48 = phi <1 x i8> [ %tmp16, %bb10 ], [ zeroinitializer, %bb ] + %tmp49 = phi <1 x i8> [ %tmp12, %bb10 ], [ zeroinitializer, %bb ] + store <1 x i8> %tmp49, ptr %arg2 + store <1 x i8> %tmp48, ptr %arg3 + store <1 x i8> %tmp47, ptr %arg4 + store <1 x i8> %tmp46, ptr %arg5 + store <1 x i8> %tmp45, ptr %arg6 + store <1 x i8> %tmp44, ptr %arg7 + store <1 x i8> %tmp43, ptr %arg8 + store <1 x i8> %tmp42, ptr %arg9 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll index cf80833981436..dbb4b3f887a48 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll @@ -7,12 +7,12 @@ define void @main(float %arg) { ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_MOV_B32_1]], implicit-def dead $scc - ; CHECK-NEXT: $vcc_lo = COPY [[S_AND_B32_]] + ; CHECK-NEXT: $vcc_lo = PRED_COPY [[S_AND_B32_]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} @@ -20,24 +20,24 @@ define void @main(float %arg) { ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; CHECK-NEXT: %20:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %1:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, %20, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: %2:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, %1, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb11: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %1, %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %2, %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_FMAC_F32_e64_1]], %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_ADD_F32_e64_]], %bb.1 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, [[S_MOV_B32_2]], %bb.1 ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI2]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CNDMASK_B32_e64_]] - ; CHECK-NEXT: S_CMP_LG_U32 killed [[COPY1]], killed [[S_MOV_B32_3]], implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, killed [[COPY2]], implicit-def dead $scc - ; CHECK-NEXT: $vcc_lo = COPY [[S_AND_B32_1]] + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: S_CMP_LG_U32 killed [[PRED_COPY1]], killed [[S_MOV_B32_3]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, killed [[PRED_COPY2]], implicit-def dead $scc + ; CHECK-NEXT: $vcc_lo = PRED_COPY [[S_AND_B32_1]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll index 3c22802d90118..62b4771b266af 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -7,8 +7,8 @@ define i32 @divergent_lshr_and_cmp(i32 %x) { ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 2, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 killed [[V_AND_B32_e64_]], 0, implicit $exec ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -17,13 +17,13 @@ define i32 @divergent_lshr_and_cmp(i32 %x) { ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 - ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[PRED_COPY]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2.UnifiedReturnBlock: - ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[V_LSHLREV_B32_e64_]], %bb.1 + ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, [[V_LSHLREV_B32_e64_]], %bb.1 ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[PHI]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[PHI]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = and i32 %x, 2 @@ -45,16 +45,16 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(i1 addrspace(1)* %out, i32 % ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset.cast, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset.cast, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def dead $scc ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 2, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_1]], 0, implicit-def $scc - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]] + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64_xexec = PRED_COPY [[PRED_COPY2]] ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc @@ -62,23 +62,23 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(i1 addrspace(1)* %out, i32 % ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1.out.true: ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY3]], killed [[S_MOV_B64_]], implicit-def dead $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[PRED_COPY3]], killed [[S_MOV_B64_]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[S_MOV_B32_3]], %subreg.sub2, killed [[S_MOV_B32_2]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY5]], %subreg.sub0, killed [[PRED_COPY4]], %subreg.sub1, killed [[S_MOV_B32_3]], %subreg.sub2, killed [[S_MOV_B32_2]], %subreg.sub3 ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2.out.else: - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub1 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY1]].sub0 ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY7]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[S_MOV_B32_5]], %subreg.sub2, killed [[S_MOV_B32_4]], %subreg.sub3 - ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY3]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY7]], %subreg.sub0, killed [[PRED_COPY6]], %subreg.sub1, killed [[S_MOV_B32_5]], %subreg.sub2, killed [[S_MOV_B32_4]], %subreg.sub3 + ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PRED_COPY3]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_1]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 entry: diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll index ccf9eec087b25..d10d0dd74741d 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll @@ -40,16 +40,12 @@ define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) { } ; GCN-LABEL: {{^}}select_and_v4: -; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0 -; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0 -; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0 -; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0 -; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] -; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] -; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] -; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, ; GCN-NOT: v_and_b32 -; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] +; GCN: store_dword define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { %c = icmp slt i32 %x, 11 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> @@ -98,16 +94,12 @@ define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) { } ; GCN-LABEL: {{^}}select_or_v4: -; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, -1 -; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, -1 -; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, -1 -; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, -1 +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, ; GCN-NOT: v_or_b32 -; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] -; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] -; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] -; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] -; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] +; GCN: store_dword define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { %c = icmp slt i32 %x, 11 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> @@ -155,15 +147,10 @@ define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i } ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32: -; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], 7, 14 -; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], 6, 10 -; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], 5, 6 -; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], 9, 2 -; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] -; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] -; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] -; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] -; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5, +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6, +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7, define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, <4 x i32> , <4 x i32> %bo = sub <4 x i32> , %sel @@ -274,16 +261,14 @@ define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspac } ; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32: -; GCN: s_mov_b32 [[T0:s[0-9]+]], 0x41500000 -; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], [[T0]], 0x40c00000 -; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], 0x41100000, 4.0 -; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], 0x40a00000, 2.0 -; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], 1.0, 0 -; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] -; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] -; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] -; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] -; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] +; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000 +; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000 +; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000 +; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000 +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]], +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]], +; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]], define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) { %sel = select i1 %cond, <4 x float> , <4 x float> %bo = fsub <4 x float> , %sel diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll index 85e7435bccc9f..2b94c3581979a 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-frame.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -59,10 +59,12 @@ entry: ; CHECK: .cfi_startproc ; SGPR33 = 65 -; CHECK: v_mov_b32_e32 [[TMP_VGPR:v[0-9]+]], s33 -; GFX900: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GFX90A-V2A-DIS: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GFX90A-V2A-EN: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; CHECK: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; CHECK: s_mov_b32 s33, s32 +; CHECK: v_mov_b32_e32 [[TMP_VGPR:v[0-9]+]], [[FP_SCRATCH_COPY]] +; GFX900: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GFX90A-V2A-DIS: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GFX90A-V2A-EN: buffer_store_dword [[TMP_VGPR]], off, s[0:3], s33 offset:320 ; 4-byte Folded Spill ; GFX900: .cfi_offset 65, 28672 ; GFX90A-V2A-DIS: .cfi_offset 65, 28672 @@ -538,22 +540,21 @@ declare hidden void @ex() #0 ; CHECK-NEXT: .cfi_undefined 60 ; CHECK-NEXT: .cfi_undefined 61 -; CHECK-NOT: .cfi_{{.*}} - +; CHECK: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; CHECK: s_mov_b32 s33, s32 ; WAVE64: s_or_saveexec_b64 [[EXEC_MASK:s\[[0-9]+:[0-9]+\]]], -1 ; WAVE32: s_or_saveexec_b32 [[EXEC_MASK:s[0-9]+]], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VGPR40_wave64 = 2600 ; WAVE64-NEXT: .cfi_offset 2600, 0 ; VGPR40_wave32 = 1576 ; WAVE32-NEXT: .cfi_offset 1576, 0 -; CHECK-NOT: .cfi_{{.*}} ; WAVE64: s_mov_b64 exec, [[EXEC_MASK]] ; WAVE32: s_mov_b32 exec_lo, [[EXEC_MASK]] ; CHECK-NOT: .cfi_{{.*}} -; CHECK: v_writelane_b32 v40, s33, 2 +; CHECK: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2 ; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] ; BLOCK_LENGTH ULEB128(5)=[0x05] @@ -573,16 +574,16 @@ declare hidden void @ex() #0 ; CHECK-NOT: .cfi_{{.*}} -; CHECK: s_mov_b32 s33, s32 ; SGPR33 = 65 ; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NOT: .cfi_{{.*}} ; CHECK: s_addk_i32 s32, -; CHECK: v_readlane_b32 s33, v40, 2 +; CHECK: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2 ; SGPR32 = 64 ; CHECK: .cfi_def_cfa_register 64 +; CHECK-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; CHECK-NOT: .cfi_{{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll new file mode 100644 index 0000000000000..92fee6060683a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll @@ -0,0 +1,164 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=V3-GFX1100 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 < %s | FileCheck --check-prefixes=V3-GFX1101 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 < %s | FileCheck --check-prefixes=V3-GFX1102 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=V3-GFX1103 %s + +; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" +; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" +; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" +; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" +; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" +; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" +; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" +; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" +; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" +; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" +; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack" +; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" +; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" +; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" +; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" +; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" +; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" +; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" +; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" +; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" +; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" +; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" +; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" +; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack" +; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" +; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" +; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc" +; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack" +; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc" +; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" +; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack" +; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" +; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack" +; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc" +; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" +; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" +; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" +; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" +; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" +; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" +; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" +; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" +; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack" +; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" +; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" +; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" +; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" +; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034" +; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035" +; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036" +; V3-GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" +; V3-GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101" +; V3-GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102" +; V3-GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103" + + + +define amdgpu_kernel void @directive_amdgcn_target() { + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index b196a194ecf38..65a7c5e831dd8 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -1,99 +1,3 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s - ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX600 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=GFX600 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=GFX601 %s @@ -190,58 +94,6 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=GFX1035 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=GFX1036 %s -; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" -; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" -; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" -; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" -; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" -; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" -; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" -; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" -; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" -; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack" -; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" -; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" -; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" -; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack" -; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" -; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" -; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack" -; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack" -; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" -; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc" -; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack" -; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc" -; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" -; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc" -; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack" -; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc" -; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" -; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack" -; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" -; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack" -; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc" -; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc" -; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" -; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" -; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" -; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack" -; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" -; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack" -; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" -; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack" -; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" -; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" -; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" -; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" -; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034" -; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035" -; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036" - ; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" ; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" ; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll index 80dc4e16c0b67..12f8d7de18345 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll @@ -10,7 +10,7 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) ; GCN-LABEL: name: s_ctpop_i64 ; GCN: %[[BCNT:[0-9]+]]:sreg_32 = S_BCNT1_I32_B64 -; GCN: %[[SREG1:[0-9]+]]:sreg_32 = COPY %[[BCNT]] +; GCN: %[[SREG1:[0-9]+]]:sreg_32 = PRED_COPY %[[BCNT]] ; GCN: %[[SREG2:[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: REG_SEQUENCE killed %[[SREG1]], %subreg.sub0, killed %[[SREG2]], %subreg.sub1 define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll index cd40dc678dcd0..2ba7059a3c9e3 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -6,22 +6,22 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down.cast, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down.cast, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]] ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[PRED_COPY4]], killed [[PRED_COPY3]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -36,16 +36,16 @@ define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY1]], 0, 16, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[PRED_COPY1]], 0, 16, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_BFE_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i16 %x, 0 %select = select i1 %setcc, i1 true, i1 %z @@ -57,23 +57,23 @@ define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s64) from %ir.1, align 4, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 11, 0 :: (dereferenceable invariant load (s64) from %ir.1, align 4, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub0 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[PRED_COPY4]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_LT_I32 killed [[COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY6]], killed [[COPY5]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_LT_I32 killed [[PRED_COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64 = PRED_COPY $scc + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[PRED_COPY6]], killed [[PRED_COPY5]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -88,15 +88,15 @@ define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[PRED_COPY1]], killed [[S_MOV_B32_]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i32 %x, 0 %select = select i1 %setcc, i1 true, i1 %z @@ -108,27 +108,27 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.1, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 13, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down.cast, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.1, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[PRED_COPY]](p4), 13, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down.cast, addrspace 4) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub1 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub1 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[REG_SEQUENCE]].sub0 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[COPY3]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY4]], %subreg.sub0, killed [[PRED_COPY3]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub3 + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY5]], %subreg.sub1 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LOAD_DWORD_IMM]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:sreg_64 = PRED_COPY $scc ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY8:%[0-9]+]]:vreg_64 = PRED_COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[PRED_COPY8]], implicit $exec + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[PRED_COPY7]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.3, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -143,18 +143,18 @@ define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[PRED_COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[PRED_COPY3]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec - ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_CNDMASK_B32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 %setcc = icmp slt i64 %x, 0 %select = select i1 %setcc, i1 true, i1 %z diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 4a7a328d33c61..ed9413b3f6c03 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1337,33 +1337,31 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; CI-NEXT: s_getpc_b64 s[40:41] ; CI-NEXT: s_mov_b32 s40, s0 ; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0 -; CI-NEXT: s_mov_b32 s14, s10 -; CI-NEXT: s_mov_b32 s12, s8 -; CI-NEXT: s_mov_b32 s13, s9 +; CI-NEXT: s_mov_b64 s[10:11], s[4:5] +; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x0 +; CI-NEXT: s_load_dword s4, s[2:3], 0x2 +; CI-NEXT: s_mov_b32 s14, s8 ; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_add_u32 s40, s40, s11 -; CI-NEXT: s_mov_b64 s[10:11], s[6:7] -; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 -; CI-NEXT: s_load_dword s6, s[4:5], 0x2 +; CI-NEXT: s_add_u32 s40, s40, s9 ; CI-NEXT: s_addc_u32 s41, s41, 0 -; CI-NEXT: s_add_u32 s8, s4, 12 -; CI-NEXT: s_addc_u32 s9, s5, 0 -; CI-NEXT: s_getpc_b64 s[4:5] -; CI-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 -; CI-NEXT: s_addc_u32 s5, s5, void_func_void@gotpcrel32@hi+12 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_add_i32_e32 v40, vcc, s6, v3 +; CI-NEXT: s_add_u32 s8, s2, 12 +; CI-NEXT: s_addc_u32 s9, s3, 0 +; CI-NEXT: s_getpc_b64 s[2:3] +; CI-NEXT: s_add_u32 s2, s2, void_func_void@gotpcrel32@lo+4 +; CI-NEXT: s_addc_u32 s3, s3, void_func_void@gotpcrel32@hi+12 +; CI-NEXT: v_add_i32_e32 v40, vcc, s4, v3 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 +; CI-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0 ; CI-NEXT: ds_read_b32 v41, v40 ; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; CI-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b64 s[4:5], s[0:1] -; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_mov_b64 s[0:1], s[40:41] ; CI-NEXT: v_or_b32_e32 v31, v0, v2 +; CI-NEXT: s_mov_b32 s12, s6 +; CI-NEXT: s_mov_b32 s13, s7 ; CI-NEXT: s_mov_b64 s[2:3], s[42:43] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_mov_b32 s39, 0xf000 @@ -1381,30 +1379,28 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; GFX9-NEXT: s_getpc_b64 s[36:37] ; GFX9-NEXT: s_mov_b32 s36, s0 ; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 +; GFX9-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-NEXT: s_load_dword s4, s[2:3], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s14, s8 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_add_u32 s36, s36, s9 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 -; GFX9-NEXT: s_add_u32 s8, s4, 12 -; GFX9-NEXT: s_addc_u32 s9, s5, 0 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void@gotpcrel32@hi+12 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s6 -; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 +; GFX9-NEXT: s_add_u32 s8, s2, 12 +; GFX9-NEXT: s_addc_u32 s9, s3, 0 +; GFX9-NEXT: s_getpc_b64 s[2:3] +; GFX9-NEXT: s_add_u32 s2, s2, void_func_void@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s3, s3, void_func_void@gotpcrel32@hi+12 +; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s4 +; GFX9-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x0 ; GFX9-NEXT: ds_read_b32 v42, v41 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 +; GFX9-NEXT: s_mov_b32 s12, s6 +; GFX9-NEXT: s_mov_b32 s13, s7 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index 314b59b7c8768..1daa5af4902ca 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -448,13 +448,14 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: .cfi_undefined 60 ; CHECK-NEXT: .cfi_undefined 61 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: .cfi_offset 2600, 256 -; CHECK-NEXT: s_mov_b64 exec, s[16:17] -; CHECK-NEXT: v_writelane_b32 v40, s33, 16 +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v40, s16, 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x14, 0xe4, 0x40 ; -; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -501,18 +502,18 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] ; CHECK-NEXT: v_mov_b32_e32 v41, v31 ; CHECK-NEXT: s_mov_b32 s42, s15 +; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7] ; CHECK-NEXT: s_mov_b32 s43, s14 ; CHECK-NEXT: s_mov_b32 s44, s13 ; CHECK-NEXT: s_mov_b32 s45, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11] +; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47] ; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] +; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35] +; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39] +; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s45 ; CHECK-NEXT: s_mov_b32 s13, s44 ; CHECK-NEXT: s_mov_b32 s14, s43 @@ -541,12 +542,13 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: v_readlane_b32 s36, v40, 2 ; CHECK-NEXT: v_readlane_b32 s35, v40, 1 ; CHECK-NEXT: v_readlane_b32 s34, v40, 0 +; CHECK-NEXT: v_readlane_b32 s4, v40, 16 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_addk_i32 s32, 0xfc00 -; CHECK-NEXT: v_readlane_b32 s33, v40, 16 ; CHECK-NEXT: .cfi_def_cfa_register 64 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll index 1cbde24c96a8b..544d10f059741 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll @@ -13,7 +13,7 @@ ; NONE: OS/ABI: SystemV (0x0) ; HSA: OS/ABI: AMDGPU_HSA (0x40) -; HSA: ABIVersion: 2 +; HSA: ABIVersion: 3 ; PAL: OS/ABI: AMDGPU_PAL (0x41) ; PAL: ABIVersion: 0 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) diff --git a/llvm/test/CodeGen/AMDGPU/elf-notes.ll b/llvm/test/CodeGen/AMDGPU/elf-notes.ll index 0507e868fea53..7c78a92486494 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-notes.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-notes.ll @@ -1,12 +1,12 @@ -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-UNK %s -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-UNK %s -; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-UNK-ELF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-HSA-ELF %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=iceland --amdhsa-code-object-version=2 < %s | FileCheck --check-prefix=OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 -filetype=obj --amdhsa-code-object-version=2 < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-PAL-ELF %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 < %s | FileCheck --check-prefix=OSABI-UNK %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-UNK %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-UNK-ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s| FileCheck --check-prefix=OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-HSA-ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 < %s | FileCheck --check-prefix=OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=iceland < %s | FileCheck --check-prefix=OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx802 -filetype=obj < %s | llvm-readelf --notes - | FileCheck --check-prefix=OSABI-PAL-ELF %s ; RUN: llc -march=r600 < %s | FileCheck --check-prefix=R600 %s ; OSABI-UNK-NOT: .hsa_code_object_version @@ -95,3 +95,6 @@ define amdgpu_kernel void @elf_notes() { ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll new file mode 100644 index 0000000000000..22f90682aa973 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll @@ -0,0 +1,23 @@ +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s + +@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 + +; No stack objects, only indirect call has to enable scrathch +; GCN-LABEL: test_indirect_call: + +; COV5: .amdhsa_private_segment_fixed_size 0{{$}} +; COV4: .amdhsa_private_segment_fixed_size 16384{{$}} + +; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 + +; COV5: .amdhsa_uses_dynamic_stack 1 +; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +define amdgpu_kernel void @test_indirect_call() { + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll b/llvm/test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll index f5309792e265a..97bd6cdad7fa5 100644 --- a/llvm/test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-atomicrmw-syncscope.ll @@ -1,5 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; Check that syncscope it copied from atomicrmw to cmpxchg during expansion. +; There should be no scc unless we have system scope. + ; GCN-LABEL: {{^}}expand_atomicrmw_agent: ; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}} define void @expand_atomicrmw_agent(float addrspace(1)* nocapture %arg) { diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll index 776de7db20657..22478a74f2a7e 100644 --- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll @@ -14,13 +14,12 @@ define i32 @s_add_co_select_user() { ; GFX7-NEXT: v_add_i32_e64 v0, s[4:5], s6, s6 ; GFX7-NEXT: s_or_b32 s4, s4, s5 ; GFX7-NEXT: s_cmp_lg_u32 s4, 0 -; GFX7-NEXT: s_addc_u32 s7, s6, 0 -; GFX7-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX7-NEXT: s_cselect_b32 s4, s7, 0 -; GFX7-NEXT: s_cmp_gt_u32 s6, 31 +; GFX7-NEXT: s_addc_u32 s4, s6, 0 ; GFX7-NEXT: v_mov_b32_e32 v1, s4 ; GFX7-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX7-NEXT: s_cmp_gt_u32 s6, 31 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -32,13 +31,12 @@ define i32 @s_add_co_select_user() { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e64 v0, s[4:5], s6, s6 ; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX9-NEXT: s_addc_u32 s7, s6, 0 -; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GFX9-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GFX9-NEXT: s_cselect_b32 s4, s7, 0 -; GFX9-NEXT: s_cmp_gt_u32 s6, 31 +; GFX9-NEXT: s_addc_u32 s4, s6, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: s_cmp_gt_u32 s6, 31 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -53,11 +51,10 @@ define i32 @s_add_co_select_user() { ; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0 ; GFX10-NEXT: s_addc_u32 s5, s4, 0 ; GFX10-NEXT: s_cselect_b32 s6, -1, 0 -; GFX10-NEXT: s_and_b32 s6, s6, exec_lo -; GFX10-NEXT: s_cselect_b32 s5, s5, 0 ; GFX10-NEXT: s_cmp_gt_u32 s4, 31 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, s5, s6 ; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_add_co_select_user: @@ -68,15 +65,15 @@ define i32 @s_add_co_select_user() { ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_add_co_u32 v0, s1, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: s_cmpk_lg_u32 s1, 0x0 ; GFX11-NEXT: s_addc_u32 s1, s0, 0 ; GFX11-NEXT: s_cselect_b32 s2, -1, 0 -; GFX11-NEXT: s_and_b32 s2, s2, exec_lo -; GFX11-NEXT: s_cselect_b32 s1, s1, 0 ; GFX11-NEXT: s_cmp_gt_u32 s0, 31 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, s1, s2 ; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 -; GFX11-NEXT: v_cndmask_b32_e32 v0, s1, v0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %i = load volatile i32, i32 addrspace(4)* null, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir index 44d8c81db1aa4..f80dba495c9ed 100644 --- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir +++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -35,8 +35,9 @@ body: | ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { - ; CHECK-NEXT: internal %6.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24:av_1024_align2 = PRED_COPY [[COPY]].sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24 + ; CHECK-NEXT: internal %6.sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16:av_1024_align2 = PRED_COPY [[COPY]].sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16 ; CHECK-NEXT: } ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir new file mode 100644 index 0000000000000..f802e1b1e18af --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir @@ -0,0 +1,279 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Tests to check the conservative lieness extension for the wwm registers during SGPR spill lowering. + +# Even though the VGPR can be shared for the wwm-operand (writelane/readlane get inserted for the SGPR spills) +# and the regular operand (%0), they get different registers as we conservatively extend the liveness of the +# wwm-operands. +--- +name: test_single_block +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + bb.0: + liveins: $sgpr4, $vgpr2_vgpr3 + ; GCN-LABEL: name: test_single_block + ; GCN: liveins: $sgpr4, $vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0 + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: SI_RETURN + SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec + SI_RETURN +... + +# Due to the presence of wwm-operand in the divergent flow, the regular variable (%0) shouldn't get the same register +# allocated for the wwm-operand in writelane/readlane when the SGPR spill is lowered. + +--- +name: test_if_else +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN-LABEL: name: test_if_else + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 0 + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec + ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr6, $sgpr10_sgpr11 + S_BRANCH %bb.1 + bb.1: + liveins: $sgpr6, $sgpr10_sgpr11 + %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec + bb.2: + liveins: $sgpr6, $sgpr10_sgpr11 + SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + S_BRANCH %bb.3 + bb.3: + liveins: $sgpr10_sgpr11 + $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec + S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 + SI_RETURN +... + +# The wwm-register usage outside the loop should have the interference marked with +# all the regular virtual registers used in the test. The divergent loop index value (%1) +# can actually share the same VGPR as the wwm-operand. But since we extend the liveness of +# the wwm operand, an interference will always exist between them. + +--- +name: test_loop +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN-LABEL: name: test_loop + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0 + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0 + ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec + ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4 + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec + ; GCN-NEXT: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: + ; GCN-NEXT: liveins: $vgpr0, $sgpr6_sgpr7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr4, $sgpr10_sgpr11 + %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec + bb.1: + liveins: $sgpr4, $sgpr10_sgpr11 + SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + %0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + S_BRANCH %bb.2 + bb.2: + liveins: $sgpr4, $sgpr10_sgpr11 + S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0 + $sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec + S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4 + %1:vgpr_32 = V_MOV_B32_e32 5, implicit $exec + S_CBRANCH_EXECZ %bb.3, implicit $exec + S_BRANCH %bb.3 + bb.3: + $vcc = V_CMP_EQ_U32_e64 0, %1:vgpr_32, implicit $exec + $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + S_CBRANCH_SCC1 %bb.5, implicit $scc + bb.4: + liveins: $sgpr6_sgpr7 + %2:vgpr_32 = V_SUB_U32_e32 1, %1:vgpr_32, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 %2:vgpr_32, implicit $exec + S_BRANCH %bb.3 + bb.5: + liveins: $sgpr6_sgpr7 + $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc + SI_RETURN +... + +# There must be one KILL instruction for the wwm-operand in every return block. +# Due to that, the wwm-register allocated should be different from the ones +# allocated for the regular virtual registers. + +--- +name: test_multiple_return_blocks +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN-LABEL: name: test_multiple_return_blocks + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0 + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $vgpr0, $vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr4, $vgpr2_vgpr3 + S_CBRANCH_EXECZ %bb.2, implicit $exec + bb.1: + liveins: $sgpr4, $vgpr2_vgpr3 + SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + %0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec + SI_RETURN + bb.2: + liveins: $vgpr2_vgpr3 + %1:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + GLOBAL_STORE_DWORD $vgpr2_vgpr3, %1:vgpr_32, 0, 0, implicit $exec + SI_RETURN +... diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index d5d24fad7a313..9ff7c3bd75385 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -534,7 +534,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16(<16 x i16> addrspace(1) * %p0, <16 x ; GFX9-NEXT: .LBB3_2: ; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB3_3: ; %T -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc @@ -706,7 +705,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(<16 x i16> addrspace(1) * %p0, <16 ; GFX9-NEXT: .LBB4_2: ; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB4_3: ; %T -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc @@ -878,7 +876,6 @@ define <4 x half> @vec_16xf16_extract_4xf16(<16 x half> addrspace(1) * %p0, <16 ; GFX9-NEXT: .LBB5_2: ; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: .LBB5_3: ; %T -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index 46bbade8db76a..77234e4fe3f1e 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -10,18 +10,18 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: bb.0.main_body: ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[PRED_COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[PRED_COPY3]], %subreg.sub0, killed [[PRED_COPY2]], %subreg.sub1, killed [[PRED_COPY1]], %subreg.sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_96 = PRED_COPY [[REG_SEQUENCE]] ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[DEF2]] ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[PRED_COPY4]], [[PRED_COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 7) ; GCN-NEXT: S_ENDPGM 0 main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index c5dd7a2e39b37..0196de16e1090 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -38,15 +38,15 @@ entry: ; GCN-LABEL: {{^}}double4_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double4_extelt(double addrspace(1)* %out, i32 %sel) { entry: @@ -57,17 +57,18 @@ entry: ; GCN-LABEL: {{^}}double5_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 4 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40140a3d, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 4 +; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C4]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { entry: @@ -106,9 +107,10 @@ entry: ; GCN-LABEL: {{^}}double2_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, {{[^,]+}}, [[C1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double2_extelt(double addrspace(1)* %out, i32 %sel) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll index ffa9b912eae3a..094ae27b5c574 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -14,12 +14,14 @@ define amdgpu_kernel void @extract_vector_elt_v3f64_2(double addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3f64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %out, <3 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x double> %foo, i32 %elt @@ -29,15 +31,18 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(double addrspace(1)* %ou ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4f64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 3 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v4f64(double addrspace(1)* %out, <4 x double> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x double> %foo, i32 %elt diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll index b2f5697383f4d..248f5fc985eee 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -31,9 +31,10 @@ define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <2 x i64> %foo, i32 %elt @@ -59,12 +60,14 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <3 x i64> %foo, i32 %elt @@ -74,15 +77,18 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, ; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64: ; GCN-NOT: buffer_load -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 3 -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 +; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 2 +; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 +; GCN-DAG: s_cmp_eq_u32 [[IDX]], 3 +; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 { %dynelt = extractelement <4 x i64> %foo, i32 %elt diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll index a5787714fb7b4..da852af3f2303 100644 --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -13,20 +13,21 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01 -; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]] +; FIXME: We should be using s_addk_i32 here, but the reg allocation hints +; are not always followed. +; SI-DAG: s_add_i32 [[SEXP0:s[0-9]+]], [[SEXP]], 0xfffffc01 +; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP0]] ; SI-DAG: s_andn2_b64 ; SI-DAG: cmp_gt_i32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 ; SI-DAG: cmp_lt_i32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 -; SI-DAG: v_cmp_gt_f64_e64 [[FCMP:s[[0-9]+:[0-9]+]]] -; SI-DAG: v_cmp_lg_f64_e32 vcc -; SI-DAG: s_and_b64 [[AND1:s[[0-9]+:[0-9]+]]], [[FCMP]], vcc -; SI-DAG: s_and_b64 [[AND1]], [[AND1]], exec -; SI-DAG: s_cselect_b32 s{{[0-9]+}}, 0x3ff00000, 0 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: v_cmp_gt_f64 +; SI-DAG: v_cmp_lg_f64 +; SI-DAG: v_cndmask_b32 +; SI: v_cndmask_b32 ; SI: v_add_f64 ; SI: s_endpgm define amdgpu_kernel void @fceil_f64(double addrspace(1)* %out, double %x) { diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll index cf610f9436acd..6baac6a333bd0 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -9,26 +9,26 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_DIV_SCALE_F32_e64_2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_RCP_F32_e64_]], 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[V_DIV_SCALE_F32_e64_]], 0, [[V_FMA_F32_e64_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_MUL_F32_e64_]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_2]], 0, [[V_FMA_F32_e64_1]], 0, [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_FMA_F32_e64_3]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: $vcc = PRED_COPY [[V_DIV_SCALE_F32_e64_1]] + ; GCN-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed [[V_FMA_F32_e64_4]], 0, [[V_FMA_F32_e64_1]], 0, [[V_FMA_F32_e64_3]], 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed [[V_DIV_FMAS_F32_e64_]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_DIV_FIXUP_F32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv float %a, %b @@ -40,26 +40,26 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY1]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_DIV_SCALE_F32_e64_2:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_3:%[0-9]+]]:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[PRED_COPY]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, [[V_DIV_SCALE_F32_e64_2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_RCP_F32_e64_]], 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, [[V_DIV_SCALE_F32_e64_]], 0, [[V_FMA_F32_e64_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_MUL_F32_e64_]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_3:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed [[V_FMA_F32_e64_2]], 0, [[V_FMA_F32_e64_1]], 0, [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_F32_e64_4:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, [[V_DIV_SCALE_F32_e64_2]], 0, [[V_FMA_F32_e64_3]], 0, [[V_DIV_SCALE_F32_e64_]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: $vcc = PRED_COPY [[V_DIV_SCALE_F32_e64_1]] + ; GCN-NEXT: [[V_DIV_FMAS_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed [[V_FMA_F32_e64_4]], 0, [[V_FMA_F32_e64_1]], 0, [[V_FMA_F32_e64_3]], 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: [[V_DIV_FIXUP_F32_e64_:%[0-9]+]]:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed [[V_DIV_FMAS_F32_e64_]], 0, [[PRED_COPY]], 0, [[PRED_COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = PRED_COPY [[V_DIV_FIXUP_F32_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll new file mode 100644 index 0000000000000..87a56b151cf06 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; The custom CSR spills inserted during the frame lowering was earlier using SP as the frame base. +; The offsets allocated for the CS objects go wrong when any local stack object has a higher +; alignment requirement than the default stack alignment for AMDGPU (either 4 or 16). The offsets +; in such cases should be from the newly aligned FP. Even to adjust the offset from the SP value +; at function entry, the FP-SP can't be statically determined with dynamic stack realignment. To +; fix the problem, use FP as the frame base in the spills whenever the function has FP. + +define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { +; GCN-LABEL: test_stack_realign: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s16, s33 +; GCN-NEXT: s_add_i32 s33, s32, 0xfc0 +; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v42, s16, 2 +; GCN-NEXT: s_addk_i32 s32, 0x3000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v42, s30, 0 +; GCN-NEXT: v_writelane_b32 v42, s31, 1 +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:92 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:88 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:84 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:80 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:76 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:72 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:68 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, v8 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: v_readlane_b32 s30, v42, 0 +; GCN-NEXT: v_readlane_b32 s31, v42, 1 +; GCN-NEXT: v_readlane_b32 s4, v42, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_addk_i32 s32, 0xd000 +; GCN-NEXT: s_mov_b32 s33, s4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %alloca.val = alloca <8 x i32>, align 64, addrspace(5) + store volatile <8 x i32> %val, ptr addrspace(5) %alloca.val, align 64 + call void asm sideeffect "", "~{v40}" () + call void asm sideeffect "", "~{v41}" () + call void @extern_func(i32 %idx) + ret void +} + +declare void @extern_func(i32) #0 + +attributes #0 = { noinline nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir index 8135de9feba1b..3f8713002f1b4 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s # Check that we first do all vector instructions and only then change exec -# CHECK-DAG: COPY $vgpr10_vgpr11 -# CHECK-DAG: COPY $vgpr12_vgpr13 -# CHECK: $exec = COPY +# CHECK-DAG: PRED_COPY $vgpr10_vgpr11 +# CHECK-DAG: PRED_COPY $vgpr12_vgpr13 +# CHECK: $exec = PRED_COPY --- name: main @@ -35,10 +35,10 @@ body: | liveins: $vgpr3, $vgpr10_vgpr11, $vgpr12_vgpr13 $vcc = V_CMP_NE_U32_e64 0, killed $vgpr3, implicit $exec - $sgpr4_sgpr5 = COPY $exec, implicit-def $exec + $sgpr4_sgpr5 = PRED_COPY $exec, implicit-def $exec $sgpr6_sgpr7 = S_AND_B64 $sgpr4_sgpr5, killed $vcc, implicit-def dead $scc $sgpr4_sgpr5 = S_XOR_B64 $sgpr6_sgpr7, killed $sgpr4_sgpr5, implicit-def dead $scc - $vgpr61_vgpr62 = COPY $vgpr10_vgpr11 - $vgpr155_vgpr156 = COPY $vgpr12_vgpr13 + $vgpr61_vgpr62 = PRED_COPY $vgpr10_vgpr11 + $vgpr155_vgpr156 = PRED_COPY $vgpr12_vgpr13 $exec = S_MOV_B64_term killed $sgpr6_sgpr7 ... diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll index 3f5708f927b42..1b0347d34eff8 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll @@ -7,23 +7,23 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) ret void @@ -34,25 +34,25 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %d ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data) ret float %ret @@ -63,23 +63,23 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) ; GFX11-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic ret void @@ -90,25 +90,25 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %d ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll index 8670cf168fd3c..1843a221498af 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll @@ -7,15 +7,15 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, doubl ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) ret void @@ -26,19 +26,19 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data) ret double %ret @@ -49,15 +49,15 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, doubl ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic ret void @@ -68,19 +68,19 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll index 7cb03f4e71343..428975dc4f2d9 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll @@ -6,12 +6,12 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr, ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) ; GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) ret void @@ -22,13 +22,13 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %p ; GFX940: bb.0 (%ir-block.0): ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr) + ; GFX940-NEXT: $vgpr0 = PRED_COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll index adb435db60ebe..bc4bc81b5396e 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-- -mcpu=tonga | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s ; There are no stack objects even though flat is used by default, so @@ -52,3 +52,6 @@ entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 9edfbefa7fcde..47478aa1057b9 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -64,45 +64,43 @@ define amdgpu_kernel void @stack_object_in_kernel_no_calls() { define amdgpu_kernel void @kernel_calls_no_stack() { ; FLAT_SCR_OPT-LABEL: kernel_calls_no_stack: ; FLAT_SCR_OPT: ; %bb.0: -; FLAT_SCR_OPT-NEXT: s_add_u32 s8, s8, s13 +; FLAT_SCR_OPT-NEXT: s_add_u32 s6, s6, s11 ; FLAT_SCR_OPT-NEXT: s_mov_b32 s32, 0 -; FLAT_SCR_OPT-NEXT: s_addc_u32 s9, s9, 0 -; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 -; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 -; FLAT_SCR_OPT-NEXT: s_mov_b64 s[8:9], s[4:5] +; FLAT_SCR_OPT-NEXT: s_addc_u32 s7, s7, 0 +; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 +; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s14, s10 +; FLAT_SCR_OPT-NEXT: s_mov_b64 s[10:11], s[4:5] ; FLAT_SCR_OPT-NEXT: s_getpc_b64 s[4:5] ; FLAT_SCR_OPT-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; FLAT_SCR_OPT-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; FLAT_SCR_OPT-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 +; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; FLAT_SCR_OPT-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s14, s12 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s13, s11 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s12, s10 -; FLAT_SCR_OPT-NEXT: s_mov_b64 s[10:11], s[6:7] -; FLAT_SCR_OPT-NEXT: v_or3_b32 v31, v0, v1, v2 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s13, s9 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s12, s8 ; FLAT_SCR_OPT-NEXT: s_mov_b64 s[4:5], s[0:1] -; FLAT_SCR_OPT-NEXT: s_mov_b64 s[6:7], s[2:3] +; FLAT_SCR_OPT-NEXT: s_mov_b64 s[8:9], s[2:3] +; FLAT_SCR_OPT-NEXT: v_or3_b32 v31, v0, v1, v2 ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] +; FLAT_SCR_OPT-NEXT: s_swappc_b64 s[30:31], s[6:7] ; FLAT_SCR_OPT-NEXT: s_endpgm ; ; FLAT_SCR_ARCH-LABEL: kernel_calls_no_stack: ; FLAT_SCR_ARCH: ; %bb.0: -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s13, s9 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s12, s8 -; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[8:9], s[4:5] +; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[10:11], s[4:5] ; FLAT_SCR_ARCH-NEXT: s_getpc_b64 s[4:5] ; FLAT_SCR_ARCH-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; FLAT_SCR_ARCH-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; FLAT_SCR_ARCH-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; FLAT_SCR_ARCH-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s14, s10 -; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[10:11], s[6:7] +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s14, s8 ; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[4:5], s[0:1] -; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[6:7], s[2:3] +; FLAT_SCR_ARCH-NEXT: s_mov_b64 s[8:9], s[2:3] +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s12, s6 ; FLAT_SCR_ARCH-NEXT: v_or3_b32 v31, v0, v1, v2 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s13, s7 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s32, 0 ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_ARCH-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -119,21 +117,15 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s104, exec_lo -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s105 +; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 4 -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s105 ; 4-byte Folded Spill -; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 0 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v72, off, s105 -; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) +; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0 +; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1 +; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s104, 4 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s104 ; 4-byte Folded Spill ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s104 +; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105 ; FLAT_SCR_OPT-NEXT: s_load_dword vcc_lo, s[0:1], 0x8 ; FLAT_SCR_OPT-NEXT: ; kill: killed $sgpr0_sgpr1 ; FLAT_SCR_OPT-NEXT: ;;#ASMSTART @@ -230,44 +222,31 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { ; FLAT_SCR_OPT-NEXT: ;;#ASMEND ; FLAT_SCR_OPT-NEXT: ;;#ASMSTART ; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: s_mov_b32 s2, exec_lo -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v2, s3 +; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s2, 4 +; FLAT_SCR_OPT-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 4 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v2, off, s3 ; 4-byte Folded Reload -; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s3, 0 -; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v2, 0 -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v2, 1 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v2, off, s3 +; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105 ; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s2 -; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v1, 0 -; FLAT_SCR_OPT-NEXT: global_store_dword v1, v0, s[0:1] +; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v1, 0 +; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v1, 1 +; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0 +; FLAT_SCR_OPT-NEXT: ; kill: killed $vgpr1 +; FLAT_SCR_OPT-NEXT: global_store_dword v2, v0, s[0:1] ; FLAT_SCR_OPT-NEXT: s_endpgm ; ; FLAT_SCR_ARCH-LABEL: test: ; FLAT_SCR_ARCH: ; %bb.0: ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s104, exec_lo -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s105 +; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 4 -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s105 ; 4-byte Folded Spill -; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 0 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v72, off, s105 -; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) +; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0 +; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1 +; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s104, 4 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s104 ; 4-byte Folded Spill ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s104 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105 ; FLAT_SCR_ARCH-NEXT: s_load_dword vcc_lo, s[0:1], 0x8 ; FLAT_SCR_ARCH-NEXT: ; kill: killed $sgpr0_sgpr1 ; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART @@ -364,24 +343,17 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { ; FLAT_SCR_ARCH-NEXT: ;;#ASMEND ; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART ; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s2, exec_lo -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v2, s3 -; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 4 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v2, off, s3 ; 4-byte Folded Reload +; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s2, 4 +; FLAT_SCR_ARCH-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s3, 0 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105 ; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v2, 0 -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v2, 1 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v2, off, s3 -; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s2 -; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v1, 0 -; FLAT_SCR_ARCH-NEXT: global_store_dword v1, v0, s[0:1] +; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v1, 0 +; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v1, 1 +; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0 +; FLAT_SCR_ARCH-NEXT: ; kill: killed $vgpr1 +; FLAT_SCR_ARCH-NEXT: global_store_dword v2, v0, s[0:1] ; FLAT_SCR_ARCH-NEXT: s_endpgm call void asm sideeffect "", "~{s[0:7]}" () call void asm sideeffect "", "~{s[8:15]}" () diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll index d8b79f16879e5..be4b1277b360f 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,26 +1,26 @@ -; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=stoney -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=CI,HSA-CI-V2,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK-V2,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-NOXNACK,HSA-VI-NOXNACK,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck -check-prefixes=VI-XNACK,HSA-VI-XNACK,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX9-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX9-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s -; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch -verify-machineinstrs | FileCheck -check-prefixes=GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,-xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-NOXNACK,GFX10-ARCH-FLAT,GCN %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+architected-flat-scratch,+xnack -verify-machineinstrs | FileCheck -check-prefixes=HSA-VI-XNACK,GFX10-ARCH-FLAT,GCN %s ; GCN-LABEL: {{^}}no_vcc_no_flat: @@ -166,3 +166,6 @@ entry: } attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll index 8fa91f2c024e8..58b8bd93b7c61 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll @@ -370,10 +370,10 @@ define amdgpu_kernel void @divergent_fneg_f64(double addrspace(1)* %out, double ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1 @@ -392,12 +392,12 @@ define amdgpu_kernel void @uniform_fneg_f64(double addrspace(1)* %out, double ad ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1 +; GCN: %[[XOR_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[XOR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx @@ -412,10 +412,10 @@ define amdgpu_kernel void @divergent_fabs_f64(double addrspace(1)* %out, double ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1 @@ -434,12 +434,12 @@ define amdgpu_kernel void @uniform_fabs_f64(double addrspace(1)* %out, double ad ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1 +; GCN: %[[AND_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[AND]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx @@ -455,10 +455,10 @@ define amdgpu_kernel void @divergent_fneg_fabs_f64(double addrspace(1)* %out, do ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 +; GCN: %[[HI32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] -; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 +; GCN: %[[LO32:[0-9]+]]:vgpr_32 = PRED_COPY %[[VREG64]].sub0 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1 @@ -478,12 +478,12 @@ define amdgpu_kernel void @uniform_fneg_fabs_f64(double addrspace(1)* %out, doub ; GCN-LABEL: bb.0 (%ir-block.0) ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR -; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 -; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 +; GCN: %[[LO32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub0 +; GCN: %[[HI32:[0-9]+]]:sreg_32 = PRED_COPY %[[VREG64]].sub1 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]] -; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]] -; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1 +; GCN: %[[OR_PRED_COPY:[0-9]+]]:sreg_32 = PRED_COPY %[[OR]] +; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_PRED_COPY]], %subreg.sub1 %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll index c4133d87795ec..fcf585d1ced53 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=VI,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600,FUNC %s @@ -109,3 +109,6 @@ declare float @fabsf(float) readnone declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir index a0815df56b601..1cc6ec2e7da9c 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir @@ -2,9 +2,9 @@ # CHECK: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # CHECK: %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# CHECK: %4:vgpr_32 = COPY %3 +# CHECK: %4:vgpr_32 = PRED_COPY %3 # CHECK: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# CHECK: %7:vgpr_32 = COPY %3 +# CHECK: %7:vgpr_32 = PRED_COPY %3 --- name: fold_cndmask @@ -25,7 +25,7 @@ body: | %2 = V_CNDMASK_B32_e64 0, %1, 0, %1, %0, implicit $exec %3 = IMPLICIT_DEF %4 = V_CNDMASK_B32_e64 0, %3, 0, %3, %0, implicit $exec - %5 = COPY %1 + %5 = PRED_COPY %1 %6 = V_CNDMASK_B32_e64 0, %5, 0, 0, %0, implicit $exec $vcc = IMPLICIT_DEF %7 = V_CNDMASK_B32_e32 %3, %3, implicit $exec, implicit $vcc diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir index fc2d4807f72d4..a0a99d2bee4e3 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -14,8 +14,8 @@ body: | ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -37,9 +37,9 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc ; GCN-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF1]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ADD_CO_U32_e32_1]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[PRED_COPY]], implicit [[V_ADD_CO_U32_e32_1]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -92,8 +92,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed $vcc + ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[PRED_COPY]], 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir index b23faff507e32..c048e91cdd1a8 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir @@ -6,7 +6,7 @@ # aren't made in users before the def is seen. # GCN-LABEL: name: mov_in_use_list_2x{{$}} -# GCN: %3:vgpr_32 = COPY undef %0 +# GCN: %3:vgpr_32 = PRED_COPY undef %0 name: mov_in_use_list_2x @@ -26,7 +26,7 @@ body: | bb.1: successors: %bb.2 - %2 = COPY %1 + %2 = PRED_COPY %1 %3 = V_XOR_B32_e64 killed %2, undef %0, implicit $exec S_NOP 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/fold-readlane.mir b/llvm/test/CodeGen/AMDGPU/fold-readlane.mir index b76cf45c785f4..f821191fe0fbd 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-readlane.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-readlane.mir @@ -28,7 +28,7 @@ body: | # GCN-LABEL: name: fold-imm-readfirstlane-readfirstlane{{$}} # GCN: %1:sreg_32_xm0 = S_MOV_B32 123 -# GCN: %3:sreg_32_xm0 = COPY %1 +# GCN: %3:sreg_32_xm0 = PRED_COPY %1 --- name: fold-imm-readfirstlane-readfirstlane @@ -37,29 +37,29 @@ body: | bb.0: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec - %2:vgpr_32 = COPY %1 + %2:vgpr_32 = PRED_COPY %1 %3:sreg_32_xm0 = V_READFIRSTLANE_B32 %2, implicit $exec S_NOP 0, implicit %3 ... # GCN-LABEL: name: fold-copy-readfirstlane{{$}} -# GCN: %0:sreg_32_xm0 = COPY $sgpr10 -# GCN: %2:sreg_32_xm0 = COPY %0 +# GCN: %0:sreg_32_xm0 = PRED_COPY $sgpr10 +# GCN: %2:sreg_32_xm0 = PRED_COPY %0 --- name: fold-copy-readfirstlane tracksRegLiveness: true body: | bb.0: liveins: $sgpr10 - %0:sreg_32_xm0 = COPY $sgpr10 - %1:vgpr_32 = COPY %0 + %0:sreg_32_xm0 = PRED_COPY $sgpr10 + %1:vgpr_32 = PRED_COPY %0 %2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec S_NOP 0, implicit %2 ... # GCN-LABEL: name: no-fold-copy-readfirstlane-physreg0{{$}} -# GCN: %0:vgpr_32 = COPY $sgpr10 +# GCN: %0:vgpr_32 = PRED_COPY $sgpr10 # GCN-NEXT: %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec --- name: no-fold-copy-readfirstlane-physreg0 @@ -67,13 +67,13 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10 - %0:vgpr_32 = COPY $sgpr10 + %0:vgpr_32 = PRED_COPY $sgpr10 %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec ... # GCN-LABEL: name: no-fold-copy-readfirstlane-physreg1{{$}} -# GCN: $vgpr0 = COPY $sgpr10 +# GCN: $vgpr0 = PRED_COPY $sgpr10 # GCN-NEXT: %0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec --- name: no-fold-copy-readfirstlane-physreg1 @@ -81,7 +81,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10 - $vgpr0 = COPY $sgpr10 + $vgpr0 = PRED_COPY $sgpr10 %0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ... @@ -128,7 +128,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 + %0:sreg_32_xm0 = PRED_COPY $sgpr12 %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec bb.1: @@ -145,7 +145,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 + %0:sreg_32_xm0 = PRED_COPY $sgpr12 %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec $exec = S_MOV_B64_term $sgpr10_sgpr11 @@ -155,7 +155,7 @@ body: | # GCN-LABEL: name: fold-copy-readfirstlane-same-block-exec-def{{$}} # GCN: COPY -# GCN-NEXT: %1:vgpr_32 = COPY %0 +# GCN-NEXT: %1:vgpr_32 = PRED_COPY %0 # GCN-NEXT: $exec = S_MOV_B64 # GCN-NEXT: V_READFIRSTLANE_B32 --- @@ -164,8 +164,8 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:sreg_32_xm0 = COPY $sgpr12 - %1:vgpr_32 = COPY %0, implicit $exec + %0:sreg_32_xm0 = PRED_COPY $sgpr12 + %1:vgpr_32 = PRED_COPY %0, implicit $exec $exec = S_MOV_B64 $sgpr10_sgpr11 %2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec @@ -215,7 +215,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10_sgpr11, $sgpr12 - %0:vgpr_32 = COPY $sgpr12 + %0:vgpr_32 = PRED_COPY $sgpr12 $exec = S_MOV_B64 $sgpr10_sgpr11 %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec ... @@ -230,8 +230,8 @@ body: | liveins: $vgpr0, $sgpr0_sgpr1 %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec - %2:sreg_32_xm0 = COPY %1 - %3:sreg_32_xm0 = COPY %2 + %2:sreg_32_xm0 = PRED_COPY %1 + %3:sreg_32_xm0 = PRED_COPY %2 S_ENDPGM 0, implicit %3 ... @@ -249,7 +249,7 @@ body: | ... # GCN-LABEL: name: fold-imm-readlane-src1{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN: V_READLANE_B32 %0, 12, implicit $exec --- name: fold-imm-readlane-src1 @@ -257,7 +257,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:sreg_32_xm0 = S_MOV_B32 12 %2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec ... @@ -265,7 +265,7 @@ body: | # Constant for subreg0 # GCN-LABEL: name: fold-imm-readfirstlane-regsequence0{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec @@ -276,7 +276,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec @@ -286,7 +286,7 @@ body: | # Constant for subreg1 # GCN-LABEL: name: fold-imm-readfirstlane-regsequence1{{$}} -# GCN: %0:vgpr_32 = COPY $vgpr0 +# GCN: %0:vgpr_32 = PRED_COPY $vgpr0 # GCN-NEXT: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, killed %0, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = S_MOV_B32 0 @@ -298,7 +298,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $vgpr0 - %0:vgpr_32 = COPY $vgpr0 + %0:vgpr_32 = PRED_COPY $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, killed %0:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec @@ -348,8 +348,8 @@ body: | # FIXME: This should fold # GCN-LABEL: name: fold-copy-readfirstlane-regsequence0{{$}} -# GCN: %0:vgpr_32 = COPY $sgpr10 -# GCN-NEXT: %1:vgpr_32 = COPY $sgpr11 +# GCN: %0:vgpr_32 = PRED_COPY $sgpr10 +# GCN-NEXT: %1:vgpr_32 = PRED_COPY $sgpr11 # GCN-NEXT: %2:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, killed %1, %subreg.sub1 # GCN-NEXT: %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0, implicit $exec # GCN-NEXT: %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1, implicit $exec @@ -359,18 +359,18 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10, $sgpr11 - %0:vgpr_32 = COPY $sgpr10 - %1:vgpr_32 = COPY $sgpr11 + %0:vgpr_32 = PRED_COPY $sgpr10 + %1:vgpr_32 = PRED_COPY $sgpr11 %2:vreg_64 = REG_SEQUENCE %0:vgpr_32, %subreg.sub0, killed %1:vgpr_32, %subreg.sub1 %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64, implicit $exec %4:sgpr_32 = V_READFIRSTLANE_B32 %2.sub1:vreg_64, implicit $exec ... # GCN-LABEL: name: fold-copy-readfirstlane-regsequence1{{$}} -# GCN: %0:sreg_32_xm0 = COPY $sgpr10 -# GCN-NEXT: %1:sreg_32_xm0 = COPY $sgpr11 -# GCN-NEXT: %2:vgpr_32 = COPY %0 -# GCN-NEXT: %3:vgpr_32 = COPY %1 +# GCN: %0:sreg_32_xm0 = PRED_COPY $sgpr10 +# GCN-NEXT: %1:sreg_32_xm0 = PRED_COPY $sgpr11 +# GCN-NEXT: %2:vgpr_32 = PRED_COPY %0 +# GCN-NEXT: %3:vgpr_32 = PRED_COPY %1 # GCN-NEXT: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, killed %3, %subreg.sub1 # GCN-NEXT: %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0, implicit $exec # GCN-NEXT: %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1, implicit $exec @@ -380,10 +380,10 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr10, $sgpr11 - %0:sreg_32_xm0 = COPY $sgpr10 - %1:sreg_32_xm0 = COPY $sgpr11 - %2:vgpr_32 = COPY %0 - %3:vgpr_32 = COPY %1 + %0:sreg_32_xm0 = PRED_COPY $sgpr10 + %1:sreg_32_xm0 = PRED_COPY $sgpr11 + %2:vgpr_32 = PRED_COPY %0 + %3:vgpr_32 = PRED_COPY %1 %4:vreg_64 = REG_SEQUENCE %2:vgpr_32, %subreg.sub0, killed %3:vgpr_32, %subreg.sub1 %5:sgpr_32 = V_READFIRSTLANE_B32 %4.sub0:vreg_64, implicit $exec %6:sgpr_32 = V_READFIRSTLANE_B32 %4.sub1:vreg_64, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir index 72f240be6626a..ed58a7e5fc1b5 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir @@ -12,16 +12,15 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_lo @@ -40,16 +39,15 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_hi @@ -68,19 +66,18 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def $exec ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec @@ -102,15 +99,14 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo @@ -128,15 +124,14 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi @@ -154,18 +149,17 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir index 61542445afcf2..557d31c456fd4 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir @@ -13,16 +13,15 @@ body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0 - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def $m0 ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec @@ -46,15 +45,14 @@ body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0 - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir new file mode 100644 index 0000000000000..2d5909e3297d2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -verify-machineinstrs | FileCheck %s + +--- +name: lshl_add_u64_gep +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: lshl_add_u64_gep + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, [[REG_SEQUENCE]], implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[V_LSHLREV_B64_e64_]].sub0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY2]], [[V_LSHLREV_B64_e64_]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr + ; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = COPY $vgpr3 + %1:vgpr_32 = COPY $vgpr2 + %2:vgpr_32 = COPY $vgpr1 + %3:vgpr_32 = COPY $vgpr0 + %4:vreg_64_align2 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 + %5:sreg_32 = S_MOV_B32 2 + %6:vreg_64_align2 = V_LSHLREV_B64_e64 killed %5, %4, implicit $exec + %7:vgpr_32 = COPY %3 + %8:vgpr_32 = COPY %6.sub0 + %9:vgpr_32 = COPY %2 + %10:vgpr_32 = COPY %6.sub1 + %11:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %7, %8, 0, implicit $exec + %13:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 %9, %10, killed %12, 0, implicit $exec + %15:vreg_64_align2 = REG_SEQUENCE %11, %subreg.sub0, %13, %subreg.sub1 + %16:vgpr_32 = FLAT_LOAD_DWORD killed %15, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = COPY %16 + SI_RETURN implicit $vgpr0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll index 11c569a1bc5c6..2ded61c98b5dc 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -193,34 +193,40 @@ define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x f define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { ; SI-LABEL: fp_to_sint_i64: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dword s2, s[0:1], 0xb -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s5, 0 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dword s8, s[0:1], 0xb +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s1, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s6, s2, 0x80017 -; SI-NEXT: s_and_b32 s4, s2, 0x7fffff -; SI-NEXT: s_sub_i32 s7, 0x96, s6 -; SI-NEXT: s_bitset1_b32 s4, 23 -; SI-NEXT: s_add_i32 s8, s6, 0xffffff6a -; SI-NEXT: s_add_i32 s9, s6, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[6:7], s[4:5], s7 -; SI-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 -; SI-NEXT: s_cmp_gt_i32 s9, 23 -; SI-NEXT: s_cselect_b32 s5, s5, s7 -; SI-NEXT: s_cselect_b32 s4, s4, s6 -; SI-NEXT: s_ashr_i32 s6, s2, 31 -; SI-NEXT: s_ashr_i32 s7, s6, 31 -; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; SI-NEXT: s_sub_u32 s2, s4, s6 -; SI-NEXT: s_subb_u32 s4, s5, s7 -; SI-NEXT: s_cmp_lt_i32 s9, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, 0, s2 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, s5 -; SI-NEXT: v_mov_b32_e32 v1, s4 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_bfe_u32 s2, s8, 0x80017 +; SI-NEXT: s_and_b32 s0, s8, 0x7fffff +; SI-NEXT: s_add_i32 s3, s2, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s0, 23 +; SI-NEXT: s_sub_i32 s9, 0x96, s2 +; SI-NEXT: s_add_i32 s10, s2, 0xffffff81 +; SI-NEXT: s_lshl_b64 s[2:3], s[0:1], s3 +; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s9 +; SI-NEXT: s_cmp_gt_i32 s10, 23 +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: v_mov_b32_e32 v3, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_ashr_i32 s0, s8, 31 +; SI-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v1, s0, v1 +; SI-NEXT: v_xor_b32_e32 v0, s1, v0 +; SI-NEXT: v_mov_b32_e32 v2, s1 +; SI-NEXT: s_cmp_lt_i32 s10, 0 +; SI-NEXT: v_subrev_i32_e32 v3, vcc, s0, v1 +; SI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[0:1] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fp_to_sint_i64: @@ -233,25 +239,31 @@ define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_bfe_u32 s9, s8, 0x80017 ; VI-NEXT: s_and_b32 s4, s8, 0x7fffff -; VI-NEXT: s_sub_i32 s6, 0x96, s9 +; VI-NEXT: s_add_i32 s6, s9, 0xffffff6a ; VI-NEXT: s_bitset1_b32 s4, 23 -; VI-NEXT: s_add_i32 s10, s9, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[6:7], s[4:5], s6 -; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s10 +; VI-NEXT: s_sub_i32 s10, 0x96, s9 +; VI-NEXT: s_lshl_b64 s[6:7], s[4:5], s6 +; VI-NEXT: s_lshr_b64 s[4:5], s[4:5], s10 ; VI-NEXT: s_addk_i32 s9, 0xff81 ; VI-NEXT: s_cmp_gt_i32 s9, 23 -; VI-NEXT: s_cselect_b32 s5, s5, s7 -; VI-NEXT: s_cselect_b32 s4, s4, s6 -; VI-NEXT: s_ashr_i32 s6, s8, 31 -; VI-NEXT: s_ashr_i32 s7, s6, 31 -; VI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; VI-NEXT: s_sub_u32 s4, s4, s6 -; VI-NEXT: s_subb_u32 s5, s5, s7 +; VI-NEXT: v_mov_b32_e32 v0, s5 +; VI-NEXT: v_mov_b32_e32 v1, s7 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s4, s8, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v1, s4, v1 +; VI-NEXT: v_xor_b32_e32 v0, s5, v0 +; VI-NEXT: v_mov_b32_e32 v2, s5 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, s4, v1 ; VI-NEXT: s_cmp_lt_i32 s9, 0 -; VI-NEXT: s_cselect_b32 s5, 0, s5 -; VI-NEXT: s_cselect_b32 s4, 0, s4 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, v3, 0, s[4:5] ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -314,53 +326,65 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s1, 0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s2, s7, 0x80017 -; SI-NEXT: s_and_b32 s0, s7, 0x7fffff -; SI-NEXT: s_sub_i32 s8, 0x96, s2 -; SI-NEXT: s_bitset1_b32 s0, 23 -; SI-NEXT: s_add_i32 s10, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 -; SI-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s9, s11, s9 -; SI-NEXT: s_cselect_b32 s8, s10, s8 -; SI-NEXT: s_ashr_i32 s10, s7, 31 -; SI-NEXT: s_ashr_i32 s11, s10, 31 -; SI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] -; SI-NEXT: s_sub_u32 s0, s8, s10 -; SI-NEXT: s_subb_u32 s7, s9, s11 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s10, 0, s7 -; SI-NEXT: s_cselect_b32 s11, 0, s0 -; SI-NEXT: s_bfe_u32 s2, s6, 0x80017 -; SI-NEXT: s_and_b32 s0, s6, 0x7fffff -; SI-NEXT: s_sub_i32 s7, 0x96, s2 -; SI-NEXT: s_bitset1_b32 s0, 23 -; SI-NEXT: s_add_i32 s12, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[8:9], s[0:1], s7 -; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s1, s1, s9 -; SI-NEXT: s_cselect_b32 s0, s0, s8 -; SI-NEXT: s_ashr_i32 s6, s6, 31 -; SI-NEXT: s_ashr_i32 s7, s6, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[6:7] -; SI-NEXT: s_sub_u32 s0, s0, s6 -; SI-NEXT: s_subb_u32 s1, s1, s7 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s6, 0, s1 -; SI-NEXT: s_cselect_b32 s7, 0, s0 ; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s9, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: v_mov_b32_e32 v2, s11 -; SI-NEXT: v_mov_b32_e32 v3, s10 -; SI-NEXT: v_mov_b32_e32 v0, s7 -; SI-NEXT: v_mov_b32_e32 v1, s6 +; SI-NEXT: s_bfe_u32 s4, s7, 0x80017 +; SI-NEXT: s_and_b32 s5, s7, 0x7fffff +; SI-NEXT: s_add_i32 s10, s4, 0xffffff6a +; SI-NEXT: s_or_b32 s8, s5, 0x800000 +; SI-NEXT: s_sub_i32 s11, 0x96, s4 +; SI-NEXT: s_add_i32 s12, s4, 0xffffff81 +; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], s10 +; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s11 +; SI-NEXT: s_cmp_gt_i32 s12, 23 +; SI-NEXT: v_mov_b32_e32 v0, s11 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: v_mov_b32_e32 v2, s10 +; SI-NEXT: v_mov_b32_e32 v3, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_ashr_i32 s4, s7, 31 +; SI-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; SI-NEXT: s_ashr_i32 s5, s4, 31 +; SI-NEXT: v_xor_b32_e32 v1, s4, v1 +; SI-NEXT: v_xor_b32_e32 v0, s5, v0 +; SI-NEXT: v_mov_b32_e32 v2, s5 +; SI-NEXT: s_cmp_lt_i32 s12, 0 +; SI-NEXT: v_subrev_i32_e32 v1, vcc, s4, v1 +; SI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_bfe_u32 s7, s6, 0x80017 +; SI-NEXT: s_and_b32 s8, s6, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] +; SI-NEXT: s_add_i32 s4, s7, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s8, 23 +; SI-NEXT: s_sub_i32 s10, 0x96, s7 +; SI-NEXT: s_addk_i32 s7, 0xff81 +; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], s4 +; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10 +; SI-NEXT: s_cmp_gt_i32 s7, 23 +; SI-NEXT: v_mov_b32_e32 v0, s9 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: v_mov_b32_e32 v4, s8 +; SI-NEXT: v_mov_b32_e32 v5, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_ashr_i32 s4, s6, 31 +; SI-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc +; SI-NEXT: s_ashr_i32 s5, s4, 31 +; SI-NEXT: v_xor_b32_e32 v1, s4, v1 +; SI-NEXT: v_xor_b32_e32 v0, s5, v0 +; SI-NEXT: v_mov_b32_e32 v4, s5 +; SI-NEXT: s_cmp_lt_i32 s7, 0 +; SI-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 +; SI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -374,47 +398,59 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f ; VI-NEXT: s_bfe_u32 s12, s7, 0x80017 ; VI-NEXT: s_and_b32 s4, s7, 0x7fffff ; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_sub_i32 s8, 0x96, s12 +; VI-NEXT: s_add_i32 s8, s12, 0xffffff6a ; VI-NEXT: s_bitset1_b32 s4, 23 ; VI-NEXT: s_mov_b32 s5, 0 -; VI-NEXT: s_add_i32 s10, s12, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 -; VI-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 +; VI-NEXT: s_sub_i32 s10, 0x96, s12 +; VI-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 +; VI-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 ; VI-NEXT: s_addk_i32 s12, 0xff81 ; VI-NEXT: s_cmp_gt_i32 s12, 23 -; VI-NEXT: s_cselect_b32 s9, s11, s9 -; VI-NEXT: s_cselect_b32 s8, s10, s8 -; VI-NEXT: s_ashr_i32 s10, s7, 31 -; VI-NEXT: s_ashr_i32 s11, s10, 31 -; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] -; VI-NEXT: s_sub_u32 s4, s8, s10 -; VI-NEXT: s_subb_u32 s7, s9, s11 +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: v_mov_b32_e32 v1, s9 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s10 +; VI-NEXT: v_mov_b32_e32 v2, s8 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s4, s7, 31 +; VI-NEXT: s_ashr_i32 s7, s4, 31 +; VI-NEXT: v_xor_b32_e32 v1, s4, v1 +; VI-NEXT: v_xor_b32_e32 v0, s7, v0 +; VI-NEXT: v_mov_b32_e32 v2, s7 +; VI-NEXT: v_subrev_u32_e32 v1, vcc, s4, v1 ; VI-NEXT: s_cmp_lt_i32 s12, 0 -; VI-NEXT: s_cselect_b32 s10, 0, s7 -; VI-NEXT: s_cselect_b32 s11, 0, s4 +; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; VI-NEXT: s_cselect_b64 s[8:9], -1, 0 ; VI-NEXT: s_bfe_u32 s7, s6, 0x80017 ; VI-NEXT: s_and_b32 s4, s6, 0x7fffff -; VI-NEXT: s_sub_i32 s8, 0x96, s7 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9] +; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[8:9] +; VI-NEXT: s_add_i32 s8, s7, 0xffffff6a ; VI-NEXT: s_bitset1_b32 s4, 23 -; VI-NEXT: s_add_i32 s12, s7, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 -; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s12 -; VI-NEXT: s_add_i32 s12, s7, 0xffffff81 -; VI-NEXT: s_cmp_gt_i32 s12, 23 -; VI-NEXT: s_cselect_b32 s5, s5, s9 -; VI-NEXT: s_cselect_b32 s4, s4, s8 -; VI-NEXT: s_ashr_i32 s6, s6, 31 -; VI-NEXT: s_ashr_i32 s7, s6, 31 -; VI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; VI-NEXT: s_sub_u32 s4, s4, s6 -; VI-NEXT: s_subb_u32 s5, s5, s7 -; VI-NEXT: s_cmp_lt_i32 s12, 0 -; VI-NEXT: s_cselect_b32 s5, 0, s5 -; VI-NEXT: s_cselect_b32 s4, 0, s4 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s11 -; VI-NEXT: v_mov_b32_e32 v3, s10 +; VI-NEXT: s_sub_i32 s10, 0x96, s7 +; VI-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 +; VI-NEXT: s_lshr_b64 s[4:5], s[4:5], s10 +; VI-NEXT: s_addk_i32 s7, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s7, 23 +; VI-NEXT: v_mov_b32_e32 v0, s5 +; VI-NEXT: v_mov_b32_e32 v1, s9 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_mov_b32_e32 v4, s8 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; VI-NEXT: s_ashr_i32 s4, s6, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v1, s4, v1 +; VI-NEXT: v_xor_b32_e32 v0, s5, v0 +; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: v_subrev_u32_e32 v5, vcc, s4, v1 +; VI-NEXT: s_cmp_lt_i32 s7, 0 +; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -509,98 +545,122 @@ define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x f define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { ; SI-LABEL: fp_to_sint_v4i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s9, 0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s2, s5, 0x80017 -; SI-NEXT: s_and_b32 s8, s5, 0x7fffff -; SI-NEXT: s_sub_i32 s10, 0x96, s2 +; SI-NEXT: s_bfe_u32 s10, s1, 0x80017 +; SI-NEXT: s_and_b32 s8, s1, 0x7fffff +; SI-NEXT: s_add_i32 s11, s10, 0xffffff6a ; SI-NEXT: s_bitset1_b32 s8, 23 -; SI-NEXT: s_add_i32 s12, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], s12 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s11, s13, s11 -; SI-NEXT: s_cselect_b32 s10, s12, s10 -; SI-NEXT: s_ashr_i32 s12, s5, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] -; SI-NEXT: s_sub_u32 s5, s10, s12 -; SI-NEXT: s_subb_u32 s8, s11, s13 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s14, 0, s8 -; SI-NEXT: s_cselect_b32 s15, 0, s5 -; SI-NEXT: s_bfe_u32 s2, s4, 0x80017 -; SI-NEXT: s_and_b32 s5, s4, 0x7fffff -; SI-NEXT: s_sub_i32 s10, 0x96, s2 -; SI-NEXT: s_or_b32 s8, s5, 0x800000 -; SI-NEXT: s_add_i32 s5, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], s5 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s11, s13, s11 -; SI-NEXT: s_cselect_b32 s10, s12, s10 -; SI-NEXT: s_ashr_i32 s4, s4, 31 -; SI-NEXT: s_ashr_i32 s5, s4, 31 -; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[4:5] -; SI-NEXT: s_sub_u32 s4, s10, s4 -; SI-NEXT: s_subb_u32 s5, s11, s5 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s12, 0, s5 -; SI-NEXT: s_cselect_b32 s13, 0, s4 -; SI-NEXT: s_bfe_u32 s2, s7, 0x80017 -; SI-NEXT: s_and_b32 s4, s7, 0x7fffff -; SI-NEXT: s_sub_i32 s5, 0x96, s2 -; SI-NEXT: s_or_b32 s8, s4, 0x800000 -; SI-NEXT: s_add_i32 s10, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[4:5], s[8:9], s5 +; SI-NEXT: s_sub_i32 s12, 0x96, s10 +; SI-NEXT: s_add_i32 s14, s10, 0xffffff81 +; SI-NEXT: s_lshl_b64 s[10:11], s[8:9], s11 +; SI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 +; SI-NEXT: s_cmp_gt_i32 s14, 23 +; SI-NEXT: v_mov_b32_e32 v0, s13 +; SI-NEXT: v_mov_b32_e32 v1, s11 +; SI-NEXT: v_mov_b32_e32 v2, s12 +; SI-NEXT: v_mov_b32_e32 v3, s10 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_ashr_i32 s1, s1, 31 +; SI-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; SI-NEXT: s_ashr_i32 s8, s1, 31 +; SI-NEXT: v_xor_b32_e32 v1, s1, v1 +; SI-NEXT: v_xor_b32_e32 v0, s8, v0 +; SI-NEXT: v_mov_b32_e32 v2, s8 +; SI-NEXT: s_cmp_lt_i32 s14, 0 +; SI-NEXT: v_subrev_i32_e32 v1, vcc, s1, v1 +; SI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; SI-NEXT: s_cselect_b64 s[10:11], -1, 0 +; SI-NEXT: s_bfe_u32 s1, s0, 0x80017 +; SI-NEXT: s_and_b32 s8, s0, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[10:11] +; SI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[10:11] +; SI-NEXT: s_add_i32 s10, s1, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s8, 23 +; SI-NEXT: s_sub_i32 s12, 0x96, s1 +; SI-NEXT: s_addk_i32 s1, 0xff81 ; SI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s5, s11, s5 -; SI-NEXT: s_cselect_b32 s4, s10, s4 -; SI-NEXT: s_ashr_i32 s10, s7, 31 -; SI-NEXT: s_ashr_i32 s11, s10, 31 -; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] -; SI-NEXT: s_sub_u32 s4, s4, s10 -; SI-NEXT: s_subb_u32 s5, s5, s11 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s10, 0, s5 -; SI-NEXT: s_cselect_b32 s11, 0, s4 -; SI-NEXT: s_bfe_u32 s2, s6, 0x80017 -; SI-NEXT: s_and_b32 s4, s6, 0x7fffff -; SI-NEXT: s_sub_i32 s5, 0x96, s2 -; SI-NEXT: s_or_b32 s8, s4, 0x800000 -; SI-NEXT: s_add_i32 s7, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[4:5], s[8:9], s5 -; SI-NEXT: s_lshl_b64 s[8:9], s[8:9], s7 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s5, s9, s5 -; SI-NEXT: s_cselect_b32 s4, s8, s4 -; SI-NEXT: s_ashr_i32 s6, s6, 31 -; SI-NEXT: s_ashr_i32 s7, s6, 31 -; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; SI-NEXT: s_sub_u32 s4, s4, s6 -; SI-NEXT: s_subb_u32 s5, s5, s7 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s5, 0, s5 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v2, s15 -; SI-NEXT: v_mov_b32_e32 v3, s14 +; SI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 +; SI-NEXT: s_cmp_gt_i32 s1, 23 ; SI-NEXT: v_mov_b32_e32 v0, s13 -; SI-NEXT: v_mov_b32_e32 v1, s12 -; SI-NEXT: v_mov_b32_e32 v6, s11 -; SI-NEXT: v_mov_b32_e32 v7, s10 -; SI-NEXT: v_mov_b32_e32 v4, s4 -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: v_mov_b32_e32 v1, s11 +; SI-NEXT: v_mov_b32_e32 v4, s12 +; SI-NEXT: v_mov_b32_e32 v5, s10 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_ashr_i32 s0, s0, 31 +; SI-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc +; SI-NEXT: s_ashr_i32 s8, s0, 31 +; SI-NEXT: v_xor_b32_e32 v1, s0, v1 +; SI-NEXT: v_xor_b32_e32 v0, s8, v0 +; SI-NEXT: v_mov_b32_e32 v4, s8 +; SI-NEXT: s_cmp_lt_i32 s1, 0 +; SI-NEXT: v_subrev_i32_e32 v5, vcc, s0, v1 +; SI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_bfe_u32 s10, s3, 0x80017 +; SI-NEXT: s_and_b32 s8, s3, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[0:1] +; SI-NEXT: s_add_i32 s0, s10, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s8, 23 +; SI-NEXT: s_sub_i32 s11, 0x96, s10 +; SI-NEXT: s_add_i32 s12, s10, 0xffffff81 +; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 +; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s11 +; SI-NEXT: s_cmp_gt_i32 s12, 23 +; SI-NEXT: v_mov_b32_e32 v4, s11 +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: v_mov_b32_e32 v6, s10 +; SI-NEXT: v_mov_b32_e32 v7, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: s_ashr_i32 s0, s3, 31 +; SI-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v5, s0, v5 +; SI-NEXT: v_xor_b32_e32 v4, s1, v4 +; SI-NEXT: v_mov_b32_e32 v6, s1 +; SI-NEXT: s_cmp_lt_i32 s12, 0 +; SI-NEXT: v_subrev_i32_e32 v5, vcc, s0, v5 +; SI-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_bfe_u32 s3, s2, 0x80017 +; SI-NEXT: s_and_b32 s8, s2, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v6, v5, 0, s[0:1] +; SI-NEXT: s_add_i32 s0, s3, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s8, 23 +; SI-NEXT: s_sub_i32 s10, 0x96, s3 +; SI-NEXT: s_addk_i32 s3, 0xff81 +; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 +; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10 +; SI-NEXT: s_cmp_gt_i32 s3, 23 +; SI-NEXT: v_mov_b32_e32 v4, s9 +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: v_mov_b32_e32 v8, s8 +; SI-NEXT: v_mov_b32_e32 v9, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: s_ashr_i32 s0, s2, 31 +; SI-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v5, s0, v5 +; SI-NEXT: v_xor_b32_e32 v4, s1, v4 +; SI-NEXT: v_mov_b32_e32 v8, s1 +; SI-NEXT: s_cmp_lt_i32 s3, 0 +; SI-NEXT: v_subrev_i32_e32 v9, vcc, s0, v5 +; SI-NEXT: v_subb_u32_e32 v4, vcc, v4, v8, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[0:1] +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fp_to_sint_v4i64: @@ -613,90 +673,113 @@ define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x f ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_bfe_u32 s14, s5, 0x80017 ; VI-NEXT: s_and_b32 s8, s5, 0x7fffff -; VI-NEXT: s_sub_i32 s10, 0x96, s14 +; VI-NEXT: s_add_i32 s10, s14, 0xffffff6a ; VI-NEXT: s_bitset1_b32 s8, 23 -; VI-NEXT: s_add_i32 s12, s14, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; VI-NEXT: s_lshl_b64 s[12:13], s[8:9], s12 +; VI-NEXT: s_sub_i32 s12, 0x96, s14 +; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 +; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 ; VI-NEXT: s_addk_i32 s14, 0xff81 ; VI-NEXT: s_cmp_gt_i32 s14, 23 -; VI-NEXT: s_cselect_b32 s11, s13, s11 -; VI-NEXT: s_cselect_b32 s10, s12, s10 -; VI-NEXT: s_ashr_i32 s12, s5, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] -; VI-NEXT: s_sub_u32 s5, s10, s12 -; VI-NEXT: s_subb_u32 s8, s11, s13 +; VI-NEXT: v_mov_b32_e32 v0, s13 +; VI-NEXT: v_mov_b32_e32 v1, s11 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s12 +; VI-NEXT: v_mov_b32_e32 v2, s10 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s5, s5, 31 +; VI-NEXT: s_ashr_i32 s8, s5, 31 +; VI-NEXT: v_xor_b32_e32 v1, s5, v1 +; VI-NEXT: v_xor_b32_e32 v0, s8, v0 +; VI-NEXT: v_mov_b32_e32 v2, s8 +; VI-NEXT: v_subrev_u32_e32 v1, vcc, s5, v1 ; VI-NEXT: s_cmp_lt_i32 s14, 0 -; VI-NEXT: s_cselect_b32 s14, 0, s8 -; VI-NEXT: s_cselect_b32 s15, 0, s5 +; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; VI-NEXT: s_cselect_b64 s[10:11], -1, 0 ; VI-NEXT: s_bfe_u32 s5, s4, 0x80017 ; VI-NEXT: s_and_b32 s8, s4, 0x7fffff -; VI-NEXT: s_sub_i32 s10, 0x96, s5 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[10:11] +; VI-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[10:11] +; VI-NEXT: s_add_i32 s10, s5, 0xffffff6a ; VI-NEXT: s_bitset1_b32 s8, 23 -; VI-NEXT: s_add_i32 s12, s5, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; VI-NEXT: s_lshl_b64 s[12:13], s[8:9], s12 -; VI-NEXT: s_add_i32 s8, s5, 0xffffff81 -; VI-NEXT: s_cmp_gt_i32 s8, 23 -; VI-NEXT: s_cselect_b32 s11, s13, s11 -; VI-NEXT: s_cselect_b32 s10, s12, s10 +; VI-NEXT: s_sub_i32 s12, 0x96, s5 +; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 +; VI-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 +; VI-NEXT: s_addk_i32 s5, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s5, 23 +; VI-NEXT: v_mov_b32_e32 v0, s13 +; VI-NEXT: v_mov_b32_e32 v1, s11 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s12 +; VI-NEXT: v_mov_b32_e32 v4, s10 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; VI-NEXT: s_ashr_i32 s4, s4, 31 -; VI-NEXT: s_ashr_i32 s5, s4, 31 -; VI-NEXT: s_xor_b64 s[10:11], s[10:11], s[4:5] -; VI-NEXT: s_sub_u32 s4, s10, s4 -; VI-NEXT: s_subb_u32 s5, s11, s5 -; VI-NEXT: s_cmp_lt_i32 s8, 0 -; VI-NEXT: s_cselect_b32 s12, 0, s5 -; VI-NEXT: s_cselect_b32 s13, 0, s4 -; VI-NEXT: s_bfe_u32 s16, s7, 0x80017 +; VI-NEXT: s_ashr_i32 s8, s4, 31 +; VI-NEXT: v_xor_b32_e32 v1, s4, v1 +; VI-NEXT: v_xor_b32_e32 v0, s8, v0 +; VI-NEXT: v_mov_b32_e32 v4, s8 +; VI-NEXT: v_subrev_u32_e32 v5, vcc, s4, v1 +; VI-NEXT: s_cmp_lt_i32 s5, 0 +; VI-NEXT: v_subb_u32_e32 v0, vcc, v0, v4, vcc +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] +; VI-NEXT: s_bfe_u32 s12, s7, 0x80017 ; VI-NEXT: s_and_b32 s5, s7, 0x7fffff -; VI-NEXT: s_sub_i32 s4, 0x96, s16 +; VI-NEXT: s_add_i32 s4, s12, 0xffffff6a ; VI-NEXT: s_or_b32 s8, s5, 0x800000 -; VI-NEXT: s_add_i32 s10, s16, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[4:5], s[8:9], s4 -; VI-NEXT: s_lshl_b64 s[10:11], s[8:9], s10 -; VI-NEXT: s_addk_i32 s16, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s16, 23 -; VI-NEXT: s_cselect_b32 s5, s11, s5 -; VI-NEXT: s_cselect_b32 s4, s10, s4 -; VI-NEXT: s_ashr_i32 s10, s7, 31 -; VI-NEXT: s_ashr_i32 s11, s10, 31 -; VI-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11] -; VI-NEXT: s_sub_u32 s4, s4, s10 -; VI-NEXT: s_subb_u32 s5, s5, s11 -; VI-NEXT: s_cmp_lt_i32 s16, 0 -; VI-NEXT: s_cselect_b32 s10, 0, s5 -; VI-NEXT: s_cselect_b32 s11, 0, s4 +; VI-NEXT: s_sub_i32 s10, 0x96, s12 +; VI-NEXT: s_lshl_b64 s[4:5], s[8:9], s4 +; VI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 +; VI-NEXT: s_addk_i32 s12, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s12, 23 +; VI-NEXT: v_mov_b32_e32 v4, s11 +; VI-NEXT: v_mov_b32_e32 v5, s5 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; VI-NEXT: v_mov_b32_e32 v5, s10 +; VI-NEXT: v_mov_b32_e32 v6, s4 +; VI-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; VI-NEXT: s_ashr_i32 s4, s7, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v5, s4, v5 +; VI-NEXT: v_xor_b32_e32 v4, s5, v4 +; VI-NEXT: v_mov_b32_e32 v6, s5 +; VI-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 +; VI-NEXT: s_cmp_lt_i32 s12, 0 +; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v6, vcc +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v7, v4, 0, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, s[4:5] ; VI-NEXT: s_bfe_u32 s7, s6, 0x80017 ; VI-NEXT: s_and_b32 s5, s6, 0x7fffff -; VI-NEXT: s_sub_i32 s4, 0x96, s7 +; VI-NEXT: s_add_i32 s4, s7, 0xffffff6a ; VI-NEXT: s_or_b32 s8, s5, 0x800000 -; VI-NEXT: s_add_i32 s16, s7, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[4:5], s[8:9], s4 -; VI-NEXT: s_lshl_b64 s[8:9], s[8:9], s16 -; VI-NEXT: s_add_i32 s16, s7, 0xffffff81 -; VI-NEXT: s_cmp_gt_i32 s16, 23 -; VI-NEXT: s_cselect_b32 s5, s9, s5 -; VI-NEXT: s_cselect_b32 s4, s8, s4 -; VI-NEXT: s_ashr_i32 s6, s6, 31 -; VI-NEXT: s_ashr_i32 s7, s6, 31 -; VI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; VI-NEXT: s_sub_u32 s4, s4, s6 -; VI-NEXT: s_subb_u32 s5, s5, s7 -; VI-NEXT: s_cmp_lt_i32 s16, 0 -; VI-NEXT: s_cselect_b32 s5, 0, s5 -; VI-NEXT: s_cselect_b32 s4, 0, s4 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_mov_b32_e32 v2, s11 -; VI-NEXT: v_mov_b32_e32 v3, s10 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; VI-NEXT: s_nop 0 -; VI-NEXT: v_mov_b32_e32 v0, s13 -; VI-NEXT: v_mov_b32_e32 v1, s12 -; VI-NEXT: v_mov_b32_e32 v2, s15 -; VI-NEXT: v_mov_b32_e32 v3, s14 +; VI-NEXT: s_sub_i32 s10, 0x96, s7 +; VI-NEXT: s_lshl_b64 s[4:5], s[8:9], s4 +; VI-NEXT: s_lshr_b64 s[8:9], s[8:9], s10 +; VI-NEXT: s_addk_i32 s7, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s7, 23 +; VI-NEXT: v_mov_b32_e32 v4, s9 +; VI-NEXT: v_mov_b32_e32 v5, s5 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; VI-NEXT: v_mov_b32_e32 v5, s8 +; VI-NEXT: v_mov_b32_e32 v8, s4 +; VI-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc +; VI-NEXT: s_ashr_i32 s4, s6, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v5, s4, v5 +; VI-NEXT: v_xor_b32_e32 v4, s5, v4 +; VI-NEXT: v_mov_b32_e32 v8, s5 +; VI-NEXT: v_subrev_u32_e32 v9, vcc, s4, v5 +; VI-NEXT: s_cmp_lt_i32 s7, 0 +; VI-NEXT: v_subb_u32_e32 v4, vcc, v4, v8, vcc +; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[4:5] +; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll index f28c8222e12d3..ce4e882b24f76 100644 --- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -153,121 +153,131 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float % ; SI-LABEL: fp_to_uint_f32_to_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s6, s[0:1], 0xb +; SI-NEXT: s_load_dword s8, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0x5f000000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s1, 0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v4, 0x5f000000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_sub_f32_e32 v1, s6, v0 -; SI-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-NEXT: v_readfirstlane_b32 s10, v1 -; SI-NEXT: s_bfe_u32 s2, s10, 0x80017 -; SI-NEXT: s_and_b32 s0, s10, 0x7fffff -; SI-NEXT: s_sub_i32 s3, 0x96, s2 -; SI-NEXT: s_bitset1_b32 s0, 23 -; SI-NEXT: s_add_i32 s8, s2, 0xffffff6a -; SI-NEXT: s_add_i32 s11, s2, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[2:3], s[0:1], s3 -; SI-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 -; SI-NEXT: s_cmp_gt_i32 s11, 23 -; SI-NEXT: s_cselect_b32 s3, s9, s3 -; SI-NEXT: s_cselect_b32 s2, s8, s2 -; SI-NEXT: s_ashr_i32 s8, s10, 31 -; SI-NEXT: s_ashr_i32 s9, s8, 31 -; SI-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] -; SI-NEXT: s_sub_u32 s0, s2, s8 -; SI-NEXT: s_subb_u32 s2, s3, s9 -; SI-NEXT: s_cmp_lt_i32 s11, 0 -; SI-NEXT: s_cselect_b32 s8, 0, s2 -; SI-NEXT: s_cselect_b32 s9, 0, s0 -; SI-NEXT: s_bfe_u32 s2, s6, 0x80017 -; SI-NEXT: s_and_b32 s0, s6, 0x7fffff -; SI-NEXT: s_sub_i32 s3, 0x96, s2 +; SI-NEXT: s_bfe_u32 s2, s8, 0x80017 +; SI-NEXT: s_and_b32 s0, s8, 0x7fffff +; SI-NEXT: v_sub_f32_e32 v0, s8, v4 +; SI-NEXT: s_add_i32 s3, s2, 0xffffff6a ; SI-NEXT: s_bitset1_b32 s0, 23 -; SI-NEXT: s_add_i32 s10, s2, 0xffffff6a -; SI-NEXT: s_add_i32 s11, s2, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[2:3], s[0:1], s3 -; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s10 -; SI-NEXT: s_cmp_gt_i32 s11, 23 -; SI-NEXT: s_cselect_b32 s1, s1, s3 -; SI-NEXT: s_cselect_b32 s0, s0, s2 -; SI-NEXT: s_ashr_i32 s2, s6, 31 -; SI-NEXT: s_ashr_i32 s3, s2, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; SI-NEXT: s_sub_u32 s0, s0, s2 -; SI-NEXT: s_subb_u32 s1, s1, s3 -; SI-NEXT: s_cmp_lt_i32 s11, 0 -; SI-NEXT: s_cselect_b32 s2, 0, s1 -; SI-NEXT: s_cselect_b32 s3, 0, s0 -; SI-NEXT: s_and_b64 s[0:1], vcc, exec -; SI-NEXT: s_cselect_b32 s3, s3, s9 -; SI-NEXT: s_xor_b32 s6, s8, 0x80000000 -; SI-NEXT: s_and_b64 s[0:1], vcc, exec -; SI-NEXT: s_cselect_b32 s0, s2, s6 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, s3 -; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_sub_i32 s9, 0x96, s2 +; SI-NEXT: s_add_i32 s10, s2, 0xffffff81 +; SI-NEXT: v_bfe_u32 v2, v0, 23, 8 +; SI-NEXT: v_and_b32_e32 v3, 0x7fffff, v0 +; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; SI-NEXT: s_lshl_b64 s[2:3], s[0:1], s3 +; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s9 +; SI-NEXT: v_add_i32_e32 v6, vcc, 0xffffff6a, v2 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v3 +; SI-NEXT: v_sub_i32_e32 v7, vcc, 0x96, v2 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xffffff81, v2 +; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; SI-NEXT: s_cmp_gt_i32 s10, 23 +; SI-NEXT: v_mov_b32_e32 v10, s1 +; SI-NEXT: v_mov_b32_e32 v11, s3 +; SI-NEXT: v_mov_b32_e32 v12, s0 +; SI-NEXT: v_mov_b32_e32 v13, s2 +; SI-NEXT: v_lshl_b64 v[2:3], v[0:1], v6 +; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], v7 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v6, v10, v11, vcc +; SI-NEXT: s_ashr_i32 s2, s8, 31 +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 23, v8 +; SI-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_cndmask_b32_e32 v2, v12, v13, vcc +; SI-NEXT: s_ashr_i32 s0, s2, 31 +; SI-NEXT: v_xor_b32_e32 v0, v0, v5 +; SI-NEXT: v_xor_b32_e32 v1, v1, v9 +; SI-NEXT: v_xor_b32_e32 v2, s2, v2 +; SI-NEXT: v_xor_b32_e32 v3, s0, v6 +; SI-NEXT: v_mov_b32_e32 v6, s0 +; SI-NEXT: s_cmp_lt_i32 s10, 0 +; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; SI-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 +; SI-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[0:1] +; SI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s8, v4 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fp_to_uint_f32_to_i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s10, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x5f000000 -; VI-NEXT: s_mov_b32 s5, 0 -; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_load_dword s8, s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s1, 0 +; VI-NEXT: v_mov_b32_e32 v6, 0x5f000000 +; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_sub_f32_e32 v1, s10, v0 -; VI-NEXT: v_readfirstlane_b32 s11, v1 -; VI-NEXT: s_bfe_u32 s12, s11, 0x80017 -; VI-NEXT: s_and_b32 s4, s11, 0x7fffff -; VI-NEXT: s_sub_i32 s6, 0x96, s12 -; VI-NEXT: s_bitset1_b32 s4, 23 -; VI-NEXT: s_add_i32 s8, s12, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[6:7], s[4:5], s6 -; VI-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 -; VI-NEXT: s_addk_i32 s12, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s12, 23 -; VI-NEXT: s_cselect_b32 s7, s9, s7 -; VI-NEXT: s_cselect_b32 s6, s8, s6 -; VI-NEXT: s_ashr_i32 s8, s11, 31 -; VI-NEXT: s_ashr_i32 s9, s8, 31 -; VI-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] -; VI-NEXT: s_sub_u32 s4, s6, s8 -; VI-NEXT: s_subb_u32 s6, s7, s9 -; VI-NEXT: s_cmp_lt_i32 s12, 0 -; VI-NEXT: s_cselect_b32 s8, 0, s6 -; VI-NEXT: s_cselect_b32 s9, 0, s4 -; VI-NEXT: s_bfe_u32 s11, s10, 0x80017 -; VI-NEXT: s_and_b32 s4, s10, 0x7fffff -; VI-NEXT: s_sub_i32 s6, 0x96, s11 -; VI-NEXT: s_bitset1_b32 s4, 23 -; VI-NEXT: s_add_i32 s12, s11, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[6:7], s[4:5], s6 -; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s12 -; VI-NEXT: s_addk_i32 s11, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s11, 23 -; VI-NEXT: s_cselect_b32 s5, s5, s7 -; VI-NEXT: s_cselect_b32 s4, s4, s6 -; VI-NEXT: s_ashr_i32 s6, s10, 31 -; VI-NEXT: s_ashr_i32 s7, s6, 31 -; VI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] -; VI-NEXT: s_sub_u32 s4, s4, s6 -; VI-NEXT: s_subb_u32 s5, s5, s7 -; VI-NEXT: s_cmp_lt_i32 s11, 0 -; VI-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; VI-NEXT: s_cselect_b32 s6, 0, s5 -; VI-NEXT: s_cselect_b32 s7, 0, s4 -; VI-NEXT: s_and_b64 s[4:5], vcc, exec -; VI-NEXT: s_cselect_b32 s7, s7, s9 -; VI-NEXT: s_xor_b32 s8, s8, 0x80000000 -; VI-NEXT: s_and_b64 s[4:5], vcc, exec -; VI-NEXT: s_cselect_b32 s4, s6, s8 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: v_mov_b32_e32 v0, s7 -; VI-NEXT: v_mov_b32_e32 v1, s4 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: s_bfe_u32 s9, s8, 0x80017 +; VI-NEXT: s_and_b32 s0, s8, 0x7fffff +; VI-NEXT: s_add_i32 s2, s9, 0xffffff6a +; VI-NEXT: s_bitset1_b32 s0, 23 +; VI-NEXT: s_sub_i32 s10, 0x96, s9 +; VI-NEXT: s_lshl_b64 s[2:3], s[0:1], s2 +; VI-NEXT: s_lshr_b64 s[0:1], s[0:1], s10 +; VI-NEXT: s_addk_i32 s9, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s9, 23 +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s0, s8, 31 +; VI-NEXT: s_ashr_i32 s1, s0, 31 +; VI-NEXT: v_xor_b32_e32 v1, s0, v1 +; VI-NEXT: v_xor_b32_e32 v0, s1, v0 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, s0, v1 +; VI-NEXT: v_sub_f32_e32 v7, s8, v6 +; VI-NEXT: v_subb_u32_e32 v4, vcc, v0, v2, vcc +; VI-NEXT: s_cmp_lt_i32 s9, 0 +; VI-NEXT: v_bfe_u32 v8, v7, 23, 8 +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v7 +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, 0xffffff6a, v8 +; VI-NEXT: v_or_b32_e32 v0, 0x800000, v0 +; VI-NEXT: v_sub_u32_e32 v9, vcc, 0x96, v8 +; VI-NEXT: v_cndmask_b32_e64 v5, v3, 0, s[2:3] +; VI-NEXT: v_lshlrev_b64 v[2:3], v2, v[0:1] +; VI-NEXT: v_lshrrev_b64 v[0:1], v9, v[0:1] +; VI-NEXT: v_add_u32_e32 v8, vcc, 0xffffff81, v8 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v8 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v7 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NEXT: v_xor_b32_e32 v0, v0, v2 +; VI-NEXT: v_xor_b32_e32 v1, v1, v3 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; VI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v6 +; VI-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[2:3] +; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1] +; VI-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; EG-LABEL: fp_to_uint_f32_to_i64: @@ -326,220 +336,238 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float % define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; SI-LABEL: fp_to_uint_v2f32_to_v2i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0x5f000000 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_mov_b32 s9, 0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v6, 0x5f000000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_sub_f32_e32 v1, s7, v0 -; SI-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; SI-NEXT: v_sub_f32_e32 v2, s6, v0 -; SI-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 -; SI-NEXT: v_readfirstlane_b32 s2, v1 -; SI-NEXT: v_readfirstlane_b32 s14, v2 -; SI-NEXT: s_bfe_u32 s10, s2, 0x80017 -; SI-NEXT: s_and_b32 s8, s2, 0x7fffff -; SI-NEXT: s_sub_i32 s11, 0x96, s10 -; SI-NEXT: s_bitset1_b32 s8, 23 -; SI-NEXT: s_add_i32 s12, s10, 0xffffff6a -; SI-NEXT: s_add_i32 s15, s10, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s11 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], s12 -; SI-NEXT: s_cmp_gt_i32 s15, 23 -; SI-NEXT: s_cselect_b32 s11, s13, s11 -; SI-NEXT: s_cselect_b32 s10, s12, s10 -; SI-NEXT: s_ashr_i32 s12, s2, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] -; SI-NEXT: s_sub_u32 s2, s10, s12 -; SI-NEXT: s_subb_u32 s8, s11, s13 -; SI-NEXT: s_cmp_lt_i32 s15, 0 -; SI-NEXT: s_cselect_b32 s15, 0, s8 -; SI-NEXT: s_cselect_b32 s2, 0, s2 -; SI-NEXT: s_bfe_u32 s10, s7, 0x80017 -; SI-NEXT: s_and_b32 s8, s7, 0x7fffff -; SI-NEXT: s_sub_i32 s11, 0x96, s10 -; SI-NEXT: s_bitset1_b32 s8, 23 -; SI-NEXT: s_add_i32 s12, s10, 0xffffff6a -; SI-NEXT: s_add_i32 s16, s10, 0xffffff81 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_bfe_u32 s0, s3, 0x80017 +; SI-NEXT: s_and_b32 s1, s3, 0x7fffff +; SI-NEXT: v_sub_f32_e32 v0, s3, v6 +; SI-NEXT: v_sub_f32_e32 v2, s2, v6 +; SI-NEXT: s_add_i32 s10, s0, 0xffffff6a +; SI-NEXT: s_or_b32 s8, s1, 0x800000 +; SI-NEXT: s_sub_i32 s11, 0x96, s0 +; SI-NEXT: s_add_i32 s12, s0, 0xffffff81 +; SI-NEXT: v_bfe_u32 v3, v0, 23, 8 +; SI-NEXT: v_and_b32_e32 v4, 0x7fffff, v0 +; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v0 +; SI-NEXT: v_bfe_u32 v5, v2, 23, 8 +; SI-NEXT: v_and_b32_e32 v8, 0x7fffff, v2 +; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v2 +; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s10 ; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s11 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], s12 -; SI-NEXT: s_cmp_gt_i32 s16, 23 -; SI-NEXT: s_cselect_b32 s11, s13, s11 -; SI-NEXT: s_cselect_b32 s10, s12, s10 -; SI-NEXT: s_ashr_i32 s12, s7, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] -; SI-NEXT: s_sub_u32 s7, s10, s12 -; SI-NEXT: s_subb_u32 s8, s11, s13 -; SI-NEXT: s_cmp_lt_i32 s16, 0 -; SI-NEXT: s_cselect_b32 s16, 0, s8 -; SI-NEXT: s_cselect_b32 s7, 0, s7 -; SI-NEXT: s_and_b64 s[10:11], vcc, exec -; SI-NEXT: s_cselect_b32 s17, s7, s2 -; SI-NEXT: s_bfe_u32 s2, s14, 0x80017 -; SI-NEXT: s_and_b32 s7, s14, 0x7fffff -; SI-NEXT: s_sub_i32 s10, 0x96, s2 -; SI-NEXT: s_or_b32 s8, s7, 0x800000 -; SI-NEXT: s_add_i32 s7, s2, 0xffffff6a -; SI-NEXT: s_addk_i32 s2, 0xff81 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], s7 -; SI-NEXT: s_cmp_gt_i32 s2, 23 -; SI-NEXT: s_cselect_b32 s11, s13, s11 -; SI-NEXT: s_cselect_b32 s10, s12, s10 -; SI-NEXT: s_ashr_i32 s12, s14, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13] -; SI-NEXT: s_sub_u32 s7, s10, s12 -; SI-NEXT: s_subb_u32 s8, s11, s13 -; SI-NEXT: s_cmp_lt_i32 s2, 0 -; SI-NEXT: s_cselect_b32 s2, 0, s8 -; SI-NEXT: s_cselect_b32 s12, 0, s7 -; SI-NEXT: s_bfe_u32 s7, s6, 0x80017 -; SI-NEXT: s_and_b32 s8, s6, 0x7fffff -; SI-NEXT: s_sub_i32 s10, 0x96, s7 +; SI-NEXT: v_add_i32_e32 v2, vcc, 0xffffff6a, v3 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v4 +; SI-NEXT: v_sub_i32_e32 v4, vcc, 0x96, v3 +; SI-NEXT: v_add_i32_e32 v10, vcc, 0xffffff81, v3 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v7 +; SI-NEXT: v_add_i32_e32 v12, vcc, 0xffffff6a, v5 +; SI-NEXT: v_sub_i32_e32 v13, vcc, 0x96, v5 +; SI-NEXT: v_add_i32_e32 v14, vcc, 0xffffff81, v5 +; SI-NEXT: v_ashrrev_i32_e32 v15, 31, v9 +; SI-NEXT: s_cmp_gt_i32 s12, 23 +; SI-NEXT: v_mov_b32_e32 v16, s11 +; SI-NEXT: v_mov_b32_e32 v17, s1 +; SI-NEXT: v_mov_b32_e32 v18, s10 +; SI-NEXT: v_mov_b32_e32 v19, s0 +; SI-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 +; SI-NEXT: v_lshr_b64 v[4:5], v[0:1], v4 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v8 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v8, v16, v17, vcc +; SI-NEXT: s_ashr_i32 s8, s3, 31 +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 23, v10 +; SI-NEXT: v_cndmask_b32_e64 v5, v5, v3, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] +; SI-NEXT: v_lshl_b64 v[2:3], v[0:1], v12 +; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], v13 +; SI-NEXT: v_cndmask_b32_e32 v12, v18, v19, vcc +; SI-NEXT: s_ashr_i32 s0, s8, 31 +; SI-NEXT: v_xor_b32_e32 v4, v4, v7 +; SI-NEXT: v_xor_b32_e32 v5, v5, v11 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v14 +; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-NEXT: v_xor_b32_e32 v2, s8, v12 +; SI-NEXT: v_xor_b32_e32 v3, s0, v8 +; SI-NEXT: v_mov_b32_e32 v8, s0 +; SI-NEXT: s_cmp_lt_i32 s12, 0 +; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; SI-NEXT: v_subb_u32_e32 v5, vcc, v5, v11, vcc +; SI-NEXT: v_xor_b32_e32 v0, v0, v9 +; SI-NEXT: v_xor_b32_e32 v1, v1, v15 +; SI-NEXT: v_subrev_i32_e32 v2, vcc, s8, v2 +; SI-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; SI-NEXT: s_cselect_b64 s[10:11], -1, 0 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v10 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: s_bfe_u32 s12, s2, 0x80017 +; SI-NEXT: s_and_b32 s8, s2, 0x7fffff +; SI-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v9 +; SI-NEXT: v_subb_u32_e64 v1, s[0:1], v1, v15, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[10:11] +; SI-NEXT: s_add_i32 s0, s12, 0xffffff6a ; SI-NEXT: s_bitset1_b32 s8, 23 -; SI-NEXT: s_add_i32 s13, s7, 0xffffff6a -; SI-NEXT: s_add_i32 s14, s7, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 -; SI-NEXT: s_lshl_b64 s[8:9], s[8:9], s13 -; SI-NEXT: s_cmp_gt_i32 s14, 23 -; SI-NEXT: s_cselect_b32 s9, s9, s11 -; SI-NEXT: s_cselect_b32 s8, s8, s10 -; SI-NEXT: s_ashr_i32 s6, s6, 31 -; SI-NEXT: s_ashr_i32 s7, s6, 31 -; SI-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] -; SI-NEXT: s_sub_u32 s6, s8, s6 -; SI-NEXT: s_subb_u32 s7, s9, s7 -; SI-NEXT: s_cmp_lt_i32 s14, 0 -; SI-NEXT: s_cselect_b32 s8, 0, s7 -; SI-NEXT: s_cselect_b32 s9, 0, s6 -; SI-NEXT: s_and_b64 s[6:7], s[0:1], exec -; SI-NEXT: s_cselect_b32 s9, s9, s12 -; SI-NEXT: s_xor_b32 s10, s15, 0x80000000 -; SI-NEXT: s_and_b64 s[6:7], vcc, exec -; SI-NEXT: s_cselect_b32 s6, s16, s10 -; SI-NEXT: s_xor_b32 s2, s2, 0x80000000 -; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; SI-NEXT: s_cselect_b32 s7, s8, s2 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: v_mov_b32_e32 v2, s17 -; SI-NEXT: v_mov_b32_e32 v0, s9 -; SI-NEXT: v_mov_b32_e32 v3, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_sub_i32 s13, 0x96, s12 +; SI-NEXT: s_addk_i32 s12, 0xff81 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v14 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[10:11] +; SI-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 +; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s3, v6 +; SI-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 +; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s13 +; SI-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; SI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; SI-NEXT: s_cmp_gt_i32 s12, 23 +; SI-NEXT: v_mov_b32_e32 v4, s9 +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: v_mov_b32_e32 v7, s8 +; SI-NEXT: v_mov_b32_e32 v8, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: s_ashr_i32 s0, s2, 31 +; SI-NEXT: v_cndmask_b32_e32 v5, v7, v8, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v5, s0, v5 +; SI-NEXT: v_xor_b32_e32 v4, s1, v4 +; SI-NEXT: v_mov_b32_e32 v7, s1 +; SI-NEXT: s_cmp_lt_i32 s12, 0 +; SI-NEXT: v_subrev_i32_e32 v5, vcc, s0, v5 +; SI-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[0:1] +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s2, v6 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fp_to_uint_v2f32_to_v2i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x5f000000 -; VI-NEXT: s_mov_b32 s7, 0xf000 -; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: v_mov_b32_e32 v8, 0x5f000000 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_sub_f32_e32 v1, s3, v0 -; VI-NEXT: v_readfirstlane_b32 s12, v1 -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_bfe_u32 s13, s12, 0x80017 -; VI-NEXT: s_and_b32 s0, s12, 0x7fffff -; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_sub_i32 s8, 0x96, s13 -; VI-NEXT: s_bitset1_b32 s0, 23 -; VI-NEXT: s_mov_b32 s1, 0 -; VI-NEXT: s_add_i32 s10, s13, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 -; VI-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 -; VI-NEXT: s_addk_i32 s13, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s13, 23 -; VI-NEXT: s_cselect_b32 s9, s11, s9 -; VI-NEXT: s_cselect_b32 s8, s10, s8 -; VI-NEXT: s_ashr_i32 s10, s12, 31 -; VI-NEXT: s_ashr_i32 s11, s10, 31 -; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] -; VI-NEXT: s_sub_u32 s0, s8, s10 -; VI-NEXT: s_subb_u32 s8, s9, s11 -; VI-NEXT: s_cmp_lt_i32 s13, 0 -; VI-NEXT: s_cselect_b32 s12, 0, s8 -; VI-NEXT: s_cselect_b32 s13, 0, s0 -; VI-NEXT: s_bfe_u32 s14, s3, 0x80017 -; VI-NEXT: s_and_b32 s0, s3, 0x7fffff -; VI-NEXT: s_sub_i32 s8, 0x96, s14 -; VI-NEXT: s_bitset1_b32 s0, 23 -; VI-NEXT: s_add_i32 s10, s14, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 -; VI-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 -; VI-NEXT: s_addk_i32 s14, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s14, 23 -; VI-NEXT: s_cselect_b32 s9, s11, s9 -; VI-NEXT: s_cselect_b32 s8, s10, s8 -; VI-NEXT: s_ashr_i32 s10, s3, 31 -; VI-NEXT: s_ashr_i32 s11, s10, 31 -; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] -; VI-NEXT: s_sub_u32 s0, s8, s10 -; VI-NEXT: s_subb_u32 s8, s9, s11 -; VI-NEXT: s_cmp_lt_i32 s14, 0 -; VI-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-NEXT: v_sub_f32_e32 v1, s2, v0 -; VI-NEXT: s_cselect_b32 s14, 0, s8 -; VI-NEXT: s_cselect_b32 s0, 0, s0 -; VI-NEXT: s_and_b64 s[8:9], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s3, v1 -; VI-NEXT: s_cselect_b32 s13, s0, s13 -; VI-NEXT: s_bfe_u32 s15, s3, 0x80017 -; VI-NEXT: s_and_b32 s0, s3, 0x7fffff -; VI-NEXT: s_sub_i32 s8, 0x96, s15 -; VI-NEXT: s_bitset1_b32 s0, 23 -; VI-NEXT: s_add_i32 s10, s15, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 -; VI-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 -; VI-NEXT: s_addk_i32 s15, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s15, 23 -; VI-NEXT: s_cselect_b32 s9, s11, s9 -; VI-NEXT: s_cselect_b32 s8, s10, s8 -; VI-NEXT: s_ashr_i32 s10, s3, 31 -; VI-NEXT: s_ashr_i32 s11, s10, 31 -; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11] -; VI-NEXT: s_sub_u32 s0, s8, s10 -; VI-NEXT: s_subb_u32 s3, s9, s11 -; VI-NEXT: s_cmp_lt_i32 s15, 0 -; VI-NEXT: s_cselect_b32 s10, 0, s3 -; VI-NEXT: s_cselect_b32 s11, 0, s0 -; VI-NEXT: s_bfe_u32 s3, s2, 0x80017 -; VI-NEXT: s_and_b32 s0, s2, 0x7fffff -; VI-NEXT: s_sub_i32 s8, 0x96, s3 -; VI-NEXT: s_bitset1_b32 s0, 23 -; VI-NEXT: s_add_i32 s15, s3, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 -; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s15 -; VI-NEXT: s_addk_i32 s3, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s3, 23 -; VI-NEXT: s_cselect_b32 s1, s1, s9 -; VI-NEXT: s_cselect_b32 s0, s0, s8 -; VI-NEXT: s_ashr_i32 s8, s2, 31 -; VI-NEXT: s_ashr_i32 s9, s8, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9] -; VI-NEXT: s_sub_u32 s0, s0, s8 -; VI-NEXT: s_subb_u32 s1, s1, s9 -; VI-NEXT: s_cmp_lt_i32 s3, 0 -; VI-NEXT: s_cselect_b32 s8, 0, s1 -; VI-NEXT: s_cselect_b32 s9, 0, s0 -; VI-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0 -; VI-NEXT: s_and_b64 s[2:3], s[0:1], exec -; VI-NEXT: s_cselect_b32 s9, s9, s11 -; VI-NEXT: s_xor_b32 s11, s12, 0x80000000 -; VI-NEXT: s_and_b64 s[2:3], vcc, exec -; VI-NEXT: s_cselect_b32 s2, s14, s11 -; VI-NEXT: s_xor_b32 s3, s10, 0x80000000 -; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; VI-NEXT: s_cselect_b32 s0, s8, s3 -; VI-NEXT: v_mov_b32_e32 v0, s9 -; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: v_mov_b32_e32 v2, s13 -; VI-NEXT: v_mov_b32_e32 v3, s2 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: s_bfe_u32 s12, s7, 0x80017 +; VI-NEXT: s_and_b32 s1, s7, 0x7fffff +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_add_i32 s0, s12, 0xffffff6a +; VI-NEXT: s_or_b32 s4, s1, 0x800000 +; VI-NEXT: s_mov_b32 s5, 0 +; VI-NEXT: s_sub_i32 s2, 0x96, s12 +; VI-NEXT: s_lshl_b64 s[0:1], s[4:5], s0 +; VI-NEXT: s_lshr_b64 s[2:3], s[4:5], s2 +; VI-NEXT: s_addk_i32 s12, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s12, 23 +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s2 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s0, s7, 31 +; VI-NEXT: s_ashr_i32 s1, s0, 31 +; VI-NEXT: v_xor_b32_e32 v1, s0, v1 +; VI-NEXT: v_xor_b32_e32 v0, s1, v0 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, s0, v1 +; VI-NEXT: v_sub_f32_e32 v9, s7, v8 +; VI-NEXT: v_subb_u32_e32 v6, vcc, v0, v2, vcc +; VI-NEXT: s_cmp_lt_i32 s12, 0 +; VI-NEXT: v_bfe_u32 v10, v9, 23, 8 +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v9 +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, 0xffffff6a, v10 +; VI-NEXT: v_or_b32_e32 v0, 0x800000, v0 +; VI-NEXT: v_sub_u32_e32 v4, vcc, 0x96, v10 +; VI-NEXT: v_cndmask_b32_e64 v7, v3, 0, s[12:13] +; VI-NEXT: v_lshlrev_b64 v[2:3], v2, v[0:1] +; VI-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1] +; VI-NEXT: v_add_u32_e32 v0, vcc, 0xffffff81, v10 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v0 +; VI-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; VI-NEXT: v_ashrrev_i32_e32 v4, 31, v9 +; VI-NEXT: v_cmp_lt_f32_e64 s[2:3], s7, v8 +; VI-NEXT: s_bfe_u32 s7, s6, 0x80017 +; VI-NEXT: s_and_b32 s4, s6, 0x7fffff +; VI-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NEXT: v_xor_b32_e32 v2, v2, v4 +; VI-NEXT: s_add_i32 s14, s7, 0xffffff6a +; VI-NEXT: s_bitset1_b32 s4, 23 +; VI-NEXT: s_sub_i32 s16, 0x96, s7 +; VI-NEXT: v_xor_b32_e32 v3, v3, v5 +; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v4 +; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v0 +; VI-NEXT: s_lshl_b64 s[14:15], s[4:5], s14 +; VI-NEXT: s_lshr_b64 s[4:5], s[4:5], s16 +; VI-NEXT: s_addk_i32 s7, 0xff81 +; VI-NEXT: v_subb_u32_e32 v5, vcc, v3, v5, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[0:1] +; VI-NEXT: s_cmp_gt_i32 s7, 23 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, v7, s[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s5 +; VI-NEXT: v_mov_b32_e32 v3, s15 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-NEXT: v_mov_b32_e32 v3, s4 +; VI-NEXT: v_mov_b32_e32 v4, s14 +; VI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; VI-NEXT: s_ashr_i32 s4, s6, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v3, s4, v3 +; VI-NEXT: v_xor_b32_e32 v0, s5, v0 +; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 +; VI-NEXT: s_cmp_lt_i32 s7, 0 +; VI-NEXT: v_sub_f32_e32 v10, s6, v8 +; VI-NEXT: v_subb_u32_e32 v7, vcc, v0, v4, vcc +; VI-NEXT: s_cselect_b64 s[14:15], -1, 0 +; VI-NEXT: v_bfe_u32 v11, v10, 23, 8 +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v10 +; VI-NEXT: v_cndmask_b32_e64 v9, v3, 0, s[14:15] +; VI-NEXT: v_add_u32_e32 v3, vcc, 0xffffff6a, v11 +; VI-NEXT: v_or_b32_e32 v0, 0x800000, v0 +; VI-NEXT: v_sub_u32_e32 v12, vcc, 0x96, v11 +; VI-NEXT: v_lshlrev_b64 v[3:4], v3, v[0:1] +; VI-NEXT: v_lshrrev_b64 v[0:1], v12, v[0:1] +; VI-NEXT: v_add_u32_e32 v11, vcc, 0xffffff81, v11 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v11 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v10 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; VI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VI-NEXT: v_xor_b32_e32 v0, v0, v3 +; VI-NEXT: v_xor_b32_e32 v1, v1, v4 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 +; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v11 +; VI-NEXT: v_cndmask_b32_e64 v4, v5, 0, s[0:1] +; VI-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[12:13] +; VI-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 +; VI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_lt_f32_e64 s[4:5], s6, v8 +; VI-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[2:3] +; VI-NEXT: v_cndmask_b32_e64 v4, v7, 0, s[14:15] +; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; VI-NEXT: s_endpgm ; ; EG-LABEL: fp_to_uint_v2f32_to_v2i64: @@ -633,413 +661,446 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %ou define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { ; SI-LABEL: fp_to_uint_v4f32_to_v4i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s11, 0xf000 -; SI-NEXT: v_mov_b32_e32 v0, 0x5f000000 -; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s3, 0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, 0x5f000000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_sub_f32_e32 v1, s13, v0 -; SI-NEXT: v_cmp_lt_f32_e32 vcc, s13, v0 -; SI-NEXT: v_sub_f32_e32 v2, s12, v0 -; SI-NEXT: v_cmp_lt_f32_e64 s[0:1], s12, v0 -; SI-NEXT: v_sub_f32_e32 v3, s15, v0 -; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], s15, v0 -; SI-NEXT: v_sub_f32_e32 v4, s14, v0 -; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], s14, v0 -; SI-NEXT: v_readfirstlane_b32 s10, v1 -; SI-NEXT: v_readfirstlane_b32 s20, v2 -; SI-NEXT: v_readfirstlane_b32 s21, v3 -; SI-NEXT: v_readfirstlane_b32 s22, v4 -; SI-NEXT: s_bfe_u32 s16, s10, 0x80017 -; SI-NEXT: s_and_b32 s6, s10, 0x7fffff -; SI-NEXT: s_sub_i32 s17, 0x96, s16 -; SI-NEXT: s_bitset1_b32 s6, 23 -; SI-NEXT: s_add_i32 s18, s16, 0xffffff6a -; SI-NEXT: s_add_i32 s23, s16, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[16:17], s[6:7], s17 -; SI-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; SI-NEXT: s_cmp_gt_i32 s23, 23 -; SI-NEXT: s_cselect_b32 s17, s19, s17 -; SI-NEXT: s_cselect_b32 s16, s18, s16 -; SI-NEXT: s_ashr_i32 s18, s10, 31 -; SI-NEXT: s_ashr_i32 s19, s18, 31 -; SI-NEXT: s_xor_b64 s[16:17], s[16:17], s[18:19] -; SI-NEXT: s_sub_u32 s6, s16, s18 -; SI-NEXT: s_subb_u32 s10, s17, s19 -; SI-NEXT: s_cmp_lt_i32 s23, 0 -; SI-NEXT: s_cselect_b32 s10, 0, s10 -; SI-NEXT: s_cselect_b32 s23, 0, s6 -; SI-NEXT: s_bfe_u32 s16, s13, 0x80017 -; SI-NEXT: s_and_b32 s6, s13, 0x7fffff -; SI-NEXT: s_sub_i32 s17, 0x96, s16 -; SI-NEXT: s_bitset1_b32 s6, 23 -; SI-NEXT: s_add_i32 s18, s16, 0xffffff6a -; SI-NEXT: s_add_i32 s24, s16, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[16:17], s[6:7], s17 -; SI-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; SI-NEXT: s_cmp_gt_i32 s24, 23 -; SI-NEXT: s_cselect_b32 s17, s19, s17 -; SI-NEXT: s_cselect_b32 s16, s18, s16 -; SI-NEXT: s_ashr_i32 s18, s13, 31 -; SI-NEXT: s_ashr_i32 s19, s18, 31 -; SI-NEXT: s_xor_b64 s[16:17], s[16:17], s[18:19] -; SI-NEXT: s_sub_u32 s6, s16, s18 -; SI-NEXT: s_subb_u32 s13, s17, s19 -; SI-NEXT: s_cmp_lt_i32 s24, 0 -; SI-NEXT: s_cselect_b32 s24, 0, s13 -; SI-NEXT: s_cselect_b32 s6, 0, s6 -; SI-NEXT: s_and_b64 s[16:17], vcc, exec -; SI-NEXT: s_cselect_b32 s23, s6, s23 -; SI-NEXT: s_bfe_u32 s13, s20, 0x80017 -; SI-NEXT: s_and_b32 s6, s20, 0x7fffff -; SI-NEXT: s_sub_i32 s16, 0x96, s13 -; SI-NEXT: s_bitset1_b32 s6, 23 -; SI-NEXT: s_add_i32 s18, s13, 0xffffff6a -; SI-NEXT: s_addk_i32 s13, 0xff81 -; SI-NEXT: s_lshr_b64 s[16:17], s[6:7], s16 -; SI-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 +; SI-NEXT: s_bfe_u32 s0, s9, 0x80017 +; SI-NEXT: s_and_b32 s1, s9, 0x7fffff +; SI-NEXT: v_sub_f32_e32 v0, s9, v2 +; SI-NEXT: v_sub_f32_e32 v3, s8, v2 +; SI-NEXT: v_sub_f32_e32 v4, s11, v2 +; SI-NEXT: v_sub_f32_e32 v7, s10, v2 +; SI-NEXT: s_add_i32 s12, s0, 0xffffff6a +; SI-NEXT: s_or_b32 s2, s1, 0x800000 +; SI-NEXT: s_sub_i32 s13, 0x96, s0 +; SI-NEXT: s_add_i32 s14, s0, 0xffffff81 +; SI-NEXT: v_bfe_u32 v5, v0, 23, 8 +; SI-NEXT: v_and_b32_e32 v6, 0x7fffff, v0 +; SI-NEXT: v_ashrrev_i32_e32 v8, 31, v0 +; SI-NEXT: v_bfe_u32 v9, v3, 23, 8 +; SI-NEXT: v_and_b32_e32 v10, 0x7fffff, v3 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v3 +; SI-NEXT: v_bfe_u32 v12, v4, 23, 8 +; SI-NEXT: v_and_b32_e32 v13, 0x7fffff, v4 +; SI-NEXT: v_ashrrev_i32_e32 v14, 31, v4 +; SI-NEXT: v_bfe_u32 v15, v7, 23, 8 +; SI-NEXT: v_add_i32_e32 v3, vcc, 0xffffff6a, v5 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v6 +; SI-NEXT: v_sub_i32_e32 v6, vcc, 0x96, v5 +; SI-NEXT: v_add_i32_e32 v16, vcc, 0xffffff81, v5 +; SI-NEXT: v_add_i32_e32 v17, vcc, 0xffffff6a, v9 +; SI-NEXT: v_sub_i32_e32 v18, vcc, 0x96, v9 +; SI-NEXT: v_add_i32_e32 v9, vcc, 0xffffff81, v9 +; SI-NEXT: v_lshl_b64 v[3:4], v[0:1], v3 +; SI-NEXT: v_lshr_b64 v[5:6], v[0:1], v6 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v16 +; SI-NEXT: v_cndmask_b32_e32 v19, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e32 v20, v5, v3, vcc +; SI-NEXT: v_add_i32_e32 v21, vcc, 0xffffff6a, v12 +; SI-NEXT: v_sub_i32_e32 v22, vcc, 0x96, v12 +; SI-NEXT: v_add_i32_e32 v12, vcc, 0xffffff81, v12 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v10 +; SI-NEXT: v_lshl_b64 v[3:4], v[0:1], v17 +; SI-NEXT: v_lshr_b64 v[5:6], v[0:1], v18 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v13 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v9 +; SI-NEXT: v_cndmask_b32_e32 v10, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e32 v13, v5, v3, vcc +; SI-NEXT: v_lshl_b64 v[3:4], v[0:1], v21 +; SI-NEXT: v_lshr_b64 v[5:6], v[0:1], v22 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v12 +; SI-NEXT: v_cndmask_b32_e32 v6, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc +; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v7 +; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v7 +; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s12 +; SI-NEXT: s_lshr_b64 s[12:13], s[2:3], s13 +; SI-NEXT: v_add_i32_e32 v3, vcc, 0xffffff6a, v15 +; SI-NEXT: v_sub_i32_e32 v17, vcc, 0x96, v15 +; SI-NEXT: v_add_i32_e32 v15, vcc, 0xffffff81, v15 +; SI-NEXT: s_cmp_gt_i32 s14, 23 +; SI-NEXT: v_or_b32_e32 v0, 0x800000, v0 +; SI-NEXT: v_lshl_b64 v[3:4], v[0:1], v3 +; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], v17 +; SI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v15 +; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; SI-NEXT: v_mov_b32_e32 v3, s13 +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: v_mov_b32_e32 v17, s12 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: v_cndmask_b32_e32 v4, v17, v4, vcc +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v8 +; SI-NEXT: v_xor_b32_e32 v18, v20, v8 +; SI-NEXT: v_xor_b32_e32 v19, v19, v17 +; SI-NEXT: v_sub_i32_e32 v8, vcc, v18, v8 +; SI-NEXT: v_ashrrev_i32_e32 v18, 31, v11 +; SI-NEXT: s_ashr_i32 s0, s9, 31 +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v4, s0, v4 +; SI-NEXT: v_xor_b32_e32 v3, s1, v3 +; SI-NEXT: v_subb_u32_e32 v17, vcc, v19, v17, vcc +; SI-NEXT: v_mov_b32_e32 v19, s1 +; SI-NEXT: v_subrev_i32_e32 v4, vcc, s0, v4 +; SI-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v14 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v16 +; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v7 +; SI-NEXT: s_cmp_lt_i32 s14, 0 +; SI-NEXT: v_xor_b32_e32 v13, v13, v11 +; SI-NEXT: v_xor_b32_e32 v10, v10, v18 +; SI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc +; SI-NEXT: s_bfe_u32 s14, s8, 0x80017 +; SI-NEXT: s_and_b32 s2, s8, 0x7fffff +; SI-NEXT: v_sub_i32_e64 v11, s[0:1], v13, v11 +; SI-NEXT: v_subb_u32_e64 v10, s[0:1], v10, v18, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v13, v17, 0, vcc +; SI-NEXT: v_xor_b32_e32 v5, v5, v14 +; SI-NEXT: v_xor_b32_e32 v6, v6, v19 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[12:13] +; SI-NEXT: s_add_i32 s0, s14, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s2, 23 +; SI-NEXT: s_sub_i32 s1, 0x96, s14 +; SI-NEXT: s_add_i32 s16, s14, 0xffffff81 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v9 +; SI-NEXT: v_cndmask_b32_e64 v9, v11, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[12:13] +; SI-NEXT: v_xor_b32_e32 v11, 0x80000000, v13 +; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc +; SI-NEXT: v_sub_i32_e32 v13, vcc, v5, v14 +; SI-NEXT: v_subb_u32_e32 v14, vcc, v6, v19, vcc +; SI-NEXT: v_xor_b32_e32 v0, v0, v7 +; SI-NEXT: v_xor_b32_e32 v1, v1, v16 +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s9, v2 +; SI-NEXT: v_cndmask_b32_e32 v5, v8, v4, vcc +; SI-NEXT: s_lshl_b64 s[12:13], s[2:3], s0 +; SI-NEXT: s_lshr_b64 s[14:15], s[2:3], s1 +; SI-NEXT: v_cndmask_b32_e32 v6, v11, v3, vcc +; SI-NEXT: v_xor_b32_e32 v4, 0x80000000, v10 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v12 +; SI-NEXT: v_cndmask_b32_e64 v8, v13, 0, vcc +; SI-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v7 +; SI-NEXT: v_subb_u32_e64 v1, s[0:1], v1, v16, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v3, v14, 0, vcc +; SI-NEXT: s_cmp_gt_i32 s16, 23 +; SI-NEXT: v_mov_b32_e32 v7, s15 +; SI-NEXT: v_mov_b32_e32 v10, s13 +; SI-NEXT: v_mov_b32_e32 v11, s14 +; SI-NEXT: v_mov_b32_e32 v12, s12 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v15 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_xor_b32_e32 v13, 0x80000000, v3 +; SI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc +; SI-NEXT: s_ashr_i32 s0, s8, 31 +; SI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; SI-NEXT: v_cndmask_b32_e32 v7, v11, v12, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v7, s0, v7 +; SI-NEXT: v_xor_b32_e32 v3, s1, v3 +; SI-NEXT: v_mov_b32_e32 v10, s1 +; SI-NEXT: s_cmp_lt_i32 s16, 0 +; SI-NEXT: v_subrev_i32_e32 v7, vcc, s0, v7 +; SI-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_bfe_u32 s9, s11, 0x80017 +; SI-NEXT: s_and_b32 s2, s11, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v7, v7, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v10, v3, 0, s[0:1] +; SI-NEXT: s_add_i32 s0, s9, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s2, 23 +; SI-NEXT: s_sub_i32 s12, 0x96, s9 +; SI-NEXT: s_add_i32 s13, s9, 0xffffff81 +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 +; SI-NEXT: v_cndmask_b32_e32 v3, v9, v7, vcc +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s0 +; SI-NEXT: s_lshr_b64 s[8:9], s[2:3], s12 ; SI-NEXT: s_cmp_gt_i32 s13, 23 -; SI-NEXT: s_cselect_b32 s17, s19, s17 -; SI-NEXT: s_cselect_b32 s16, s18, s16 -; SI-NEXT: s_ashr_i32 s18, s20, 31 -; SI-NEXT: s_ashr_i32 s19, s18, 31 -; SI-NEXT: s_xor_b64 s[16:17], s[16:17], s[18:19] -; SI-NEXT: s_sub_u32 s6, s16, s18 -; SI-NEXT: s_subb_u32 s16, s17, s19 +; SI-NEXT: v_mov_b32_e32 v7, s9 +; SI-NEXT: v_mov_b32_e32 v9, s1 +; SI-NEXT: v_mov_b32_e32 v10, s8 +; SI-NEXT: v_mov_b32_e32 v11, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: s_ashr_i32 s0, s11, 31 +; SI-NEXT: v_cndmask_b32_e32 v9, v10, v11, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v9, s0, v9 +; SI-NEXT: v_xor_b32_e32 v7, s1, v7 +; SI-NEXT: v_mov_b32_e32 v10, s1 ; SI-NEXT: s_cmp_lt_i32 s13, 0 -; SI-NEXT: s_cselect_b32 s20, 0, s16 -; SI-NEXT: s_cselect_b32 s25, 0, s6 -; SI-NEXT: s_bfe_u32 s13, s12, 0x80017 -; SI-NEXT: s_and_b32 s6, s12, 0x7fffff -; SI-NEXT: s_sub_i32 s16, 0x96, s13 -; SI-NEXT: s_bitset1_b32 s6, 23 -; SI-NEXT: s_add_i32 s18, s13, 0xffffff6a -; SI-NEXT: s_add_i32 s26, s13, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[16:17], s[6:7], s16 -; SI-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; SI-NEXT: s_cmp_gt_i32 s26, 23 -; SI-NEXT: s_cselect_b32 s17, s19, s17 -; SI-NEXT: s_cselect_b32 s16, s18, s16 -; SI-NEXT: s_ashr_i32 s12, s12, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[16:17], s[16:17], s[12:13] -; SI-NEXT: s_sub_u32 s6, s16, s12 -; SI-NEXT: s_subb_u32 s12, s17, s13 -; SI-NEXT: s_cmp_lt_i32 s26, 0 -; SI-NEXT: s_cselect_b32 s16, 0, s12 -; SI-NEXT: s_cselect_b32 s6, 0, s6 -; SI-NEXT: s_and_b64 s[12:13], s[0:1], exec -; SI-NEXT: s_cselect_b32 s17, s6, s25 -; SI-NEXT: s_xor_b32 s6, s10, 0x80000000 -; SI-NEXT: s_and_b64 s[12:13], vcc, exec -; SI-NEXT: s_cselect_b32 s18, s24, s6 -; SI-NEXT: s_xor_b32 s6, s20, 0x80000000 -; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; SI-NEXT: s_cselect_b32 s16, s16, s6 -; SI-NEXT: s_bfe_u32 s0, s21, 0x80017 -; SI-NEXT: s_and_b32 s1, s21, 0x7fffff -; SI-NEXT: s_sub_i32 s10, 0x96, s0 -; SI-NEXT: s_or_b32 s6, s1, 0x800000 -; SI-NEXT: s_add_i32 s12, s0, 0xffffff6a -; SI-NEXT: s_add_i32 s19, s0, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[0:1], s[6:7], s10 -; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 -; SI-NEXT: s_cmp_gt_i32 s19, 23 -; SI-NEXT: s_cselect_b32 s1, s13, s1 -; SI-NEXT: s_cselect_b32 s0, s12, s0 -; SI-NEXT: s_ashr_i32 s12, s21, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; SI-NEXT: s_sub_u32 s0, s0, s12 -; SI-NEXT: s_subb_u32 s1, s1, s13 -; SI-NEXT: s_cmp_lt_i32 s19, 0 -; SI-NEXT: s_cselect_b32 s10, 0, s1 -; SI-NEXT: s_cselect_b32 s19, 0, s0 -; SI-NEXT: s_bfe_u32 s0, s15, 0x80017 -; SI-NEXT: s_and_b32 s1, s15, 0x7fffff -; SI-NEXT: s_sub_i32 s12, 0x96, s0 -; SI-NEXT: s_or_b32 s6, s1, 0x800000 -; SI-NEXT: s_add_i32 s13, s0, 0xffffff6a -; SI-NEXT: s_add_i32 s20, s0, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 -; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 -; SI-NEXT: s_cmp_gt_i32 s20, 23 -; SI-NEXT: s_cselect_b32 s1, s13, s1 -; SI-NEXT: s_cselect_b32 s0, s12, s0 -; SI-NEXT: s_ashr_i32 s12, s15, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; SI-NEXT: s_sub_u32 s0, s0, s12 -; SI-NEXT: s_subb_u32 s1, s1, s13 -; SI-NEXT: s_cmp_lt_i32 s20, 0 -; SI-NEXT: s_cselect_b32 s15, 0, s1 -; SI-NEXT: s_cselect_b32 s6, 0, s0 -; SI-NEXT: s_and_b64 s[0:1], s[2:3], exec -; SI-NEXT: s_cselect_b32 s19, s6, s19 -; SI-NEXT: s_bfe_u32 s0, s22, 0x80017 -; SI-NEXT: s_and_b32 s1, s22, 0x7fffff -; SI-NEXT: s_sub_i32 s12, 0x96, s0 -; SI-NEXT: s_or_b32 s6, s1, 0x800000 -; SI-NEXT: s_add_i32 s13, s0, 0xffffff6a -; SI-NEXT: s_add_i32 s20, s0, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 -; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 -; SI-NEXT: s_cmp_gt_i32 s20, 23 -; SI-NEXT: s_cselect_b32 s1, s13, s1 -; SI-NEXT: s_cselect_b32 s0, s12, s0 -; SI-NEXT: s_ashr_i32 s12, s22, 31 -; SI-NEXT: s_ashr_i32 s13, s12, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; SI-NEXT: s_sub_u32 s0, s0, s12 -; SI-NEXT: s_subb_u32 s1, s1, s13 -; SI-NEXT: s_cmp_lt_i32 s20, 0 -; SI-NEXT: s_cselect_b32 s12, 0, s1 -; SI-NEXT: s_cselect_b32 s13, 0, s0 -; SI-NEXT: s_bfe_u32 s0, s14, 0x80017 -; SI-NEXT: s_and_b32 s1, s14, 0x7fffff -; SI-NEXT: s_sub_i32 s20, 0x96, s0 -; SI-NEXT: s_or_b32 s6, s1, 0x800000 -; SI-NEXT: s_add_i32 s21, s0, 0xffffff6a -; SI-NEXT: s_add_i32 s22, s0, 0xffffff81 -; SI-NEXT: s_lshr_b64 s[0:1], s[6:7], s20 -; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], s21 -; SI-NEXT: s_cmp_gt_i32 s22, 23 -; SI-NEXT: s_cselect_b32 s1, s7, s1 -; SI-NEXT: s_cselect_b32 s0, s6, s0 -; SI-NEXT: s_ashr_i32 s6, s14, 31 -; SI-NEXT: s_ashr_i32 s7, s6, 31 -; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[6:7] -; SI-NEXT: s_sub_u32 s0, s0, s6 -; SI-NEXT: s_subb_u32 s1, s1, s7 -; SI-NEXT: s_cmp_lt_i32 s22, 0 -; SI-NEXT: s_cselect_b32 s6, 0, s1 -; SI-NEXT: s_cselect_b32 s7, 0, s0 -; SI-NEXT: s_and_b64 s[0:1], s[4:5], exec -; SI-NEXT: s_cselect_b32 s7, s7, s13 -; SI-NEXT: s_xor_b32 s10, s10, 0x80000000 -; SI-NEXT: s_and_b64 s[0:1], s[2:3], exec -; SI-NEXT: s_cselect_b32 s2, s15, s10 -; SI-NEXT: s_xor_b32 s3, s12, 0x80000000 -; SI-NEXT: s_and_b64 s[0:1], s[4:5], exec -; SI-NEXT: s_cselect_b32 s0, s6, s3 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: v_mov_b32_e32 v2, s23 -; SI-NEXT: v_mov_b32_e32 v0, s17 -; SI-NEXT: v_mov_b32_e32 v3, s18 -; SI-NEXT: v_mov_b32_e32 v1, s16 -; SI-NEXT: v_mov_b32_e32 v6, s19 -; SI-NEXT: v_mov_b32_e32 v4, s7 -; SI-NEXT: v_mov_b32_e32 v7, s2 -; SI-NEXT: v_mov_b32_e32 v5, s0 -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NEXT: v_subrev_i32_e32 v9, vcc, s0, v9 +; SI-NEXT: v_subb_u32_e32 v7, vcc, v7, v10, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_bfe_u32 s8, s10, 0x80017 +; SI-NEXT: s_and_b32 s2, s10, 0x7fffff +; SI-NEXT: v_cndmask_b32_e64 v9, v9, 0, s[0:1] +; SI-NEXT: s_add_i32 s9, s8, 0xffffff6a +; SI-NEXT: s_bitset1_b32 s2, 23 +; SI-NEXT: s_sub_i32 s12, 0x96, s8 +; SI-NEXT: s_addk_i32 s8, 0xff81 +; SI-NEXT: v_cndmask_b32_e64 v7, v7, 0, s[0:1] +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s11, v2 +; SI-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc +; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], s9 +; SI-NEXT: s_lshr_b64 s[2:3], s[2:3], s12 +; SI-NEXT: v_cndmask_b32_e32 v10, v13, v7, vcc +; SI-NEXT: s_cmp_gt_i32 s8, 23 +; SI-NEXT: v_mov_b32_e32 v7, s3 +; SI-NEXT: v_mov_b32_e32 v8, s1 +; SI-NEXT: v_mov_b32_e32 v11, s2 +; SI-NEXT: v_mov_b32_e32 v12, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc +; SI-NEXT: s_ashr_i32 s0, s10, 31 +; SI-NEXT: v_cndmask_b32_e32 v8, v11, v12, vcc +; SI-NEXT: s_ashr_i32 s1, s0, 31 +; SI-NEXT: v_xor_b32_e32 v8, s0, v8 +; SI-NEXT: v_xor_b32_e32 v7, s1, v7 +; SI-NEXT: v_mov_b32_e32 v11, s1 +; SI-NEXT: s_cmp_lt_i32 s8, 0 +; SI-NEXT: v_subrev_i32_e32 v8, vcc, s0, v8 +; SI-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v11, v7, 0, s[0:1] +; SI-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 +; SI-NEXT: v_cndmask_b32_e32 v7, v0, v8, vcc +; SI-NEXT: v_cndmask_b32_e32 v8, v1, v11, vcc +; SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[4:7], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fp_to_uint_v4f32_to_v4i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34 -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x5f000000 -; VI-NEXT: s_mov_b32 s3, 0 -; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s13, 0 +; VI-NEXT: v_mov_b32_e32 v6, 0x5f000000 +; VI-NEXT: v_mov_b32_e32 v5, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_sub_f32_e32 v1, s9, v0 -; VI-NEXT: v_readfirstlane_b32 s14, v1 -; VI-NEXT: s_bfe_u32 s15, s14, 0x80017 -; VI-NEXT: s_and_b32 s1, s14, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s15 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s15, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s15, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s15, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s14, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s15, 0 -; VI-NEXT: s_cselect_b32 s14, 0, s1 -; VI-NEXT: s_cselect_b32 s15, 0, s0 -; VI-NEXT: s_bfe_u32 s16, s9, 0x80017 -; VI-NEXT: s_and_b32 s1, s9, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s16 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s16, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s16, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s16, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s9, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s16, 0 -; VI-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; VI-NEXT: s_cselect_b32 s16, 0, s1 -; VI-NEXT: s_cselect_b32 s2, 0, s0 -; VI-NEXT: s_and_b64 s[0:1], vcc, exec -; VI-NEXT: v_sub_f32_e32 v1, s8, v0 -; VI-NEXT: s_cselect_b32 s9, s2, s15 -; VI-NEXT: v_readfirstlane_b32 s15, v1 -; VI-NEXT: s_bfe_u32 s17, s15, 0x80017 -; VI-NEXT: s_and_b32 s1, s15, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s17 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s17, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s17, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s17, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s15, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s17, 0 -; VI-NEXT: s_cselect_b32 s15, 0, s1 -; VI-NEXT: s_cselect_b32 s17, 0, s0 -; VI-NEXT: s_bfe_u32 s18, s8, 0x80017 -; VI-NEXT: s_and_b32 s1, s8, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s18 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s18, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s18, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s18, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s8, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s18, 0 -; VI-NEXT: s_cselect_b32 s2, 0, s1 -; VI-NEXT: s_cselect_b32 s18, 0, s0 -; VI-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; VI-NEXT: s_and_b64 s[12:13], s[0:1], exec -; VI-NEXT: s_cselect_b32 s8, s18, s17 -; VI-NEXT: s_xor_b32 s14, s14, 0x80000000 -; VI-NEXT: s_and_b64 s[12:13], vcc, exec -; VI-NEXT: s_cselect_b32 s14, s16, s14 -; VI-NEXT: s_xor_b32 s12, s15, 0x80000000 -; VI-NEXT: v_sub_f32_e32 v1, s11, v0 -; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; VI-NEXT: v_readfirstlane_b32 s16, v1 -; VI-NEXT: s_cselect_b32 s15, s2, s12 -; VI-NEXT: s_bfe_u32 s17, s16, 0x80017 -; VI-NEXT: s_and_b32 s1, s16, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s17 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s17, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s17, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s17, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s16, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s17, 0 -; VI-NEXT: s_cselect_b32 s16, 0, s1 -; VI-NEXT: s_cselect_b32 s17, 0, s0 -; VI-NEXT: s_bfe_u32 s18, s11, 0x80017 -; VI-NEXT: s_and_b32 s1, s11, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s18 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s18, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s18, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s18, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s11, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s18, 0 -; VI-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; VI-NEXT: s_cselect_b32 s18, 0, s1 -; VI-NEXT: s_cselect_b32 s2, 0, s0 -; VI-NEXT: s_and_b64 s[0:1], vcc, exec -; VI-NEXT: v_sub_f32_e32 v1, s10, v0 -; VI-NEXT: s_cselect_b32 s11, s2, s17 -; VI-NEXT: v_readfirstlane_b32 s17, v1 -; VI-NEXT: s_bfe_u32 s19, s17, 0x80017 -; VI-NEXT: s_and_b32 s1, s17, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s19 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s12, s19, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[12:13], s[2:3], s12 -; VI-NEXT: s_addk_i32 s19, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s19, 23 -; VI-NEXT: s_cselect_b32 s1, s13, s1 -; VI-NEXT: s_cselect_b32 s0, s12, s0 -; VI-NEXT: s_ashr_i32 s12, s17, 31 -; VI-NEXT: s_ashr_i32 s13, s12, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[12:13] -; VI-NEXT: s_sub_u32 s0, s0, s12 -; VI-NEXT: s_subb_u32 s1, s1, s13 -; VI-NEXT: s_cmp_lt_i32 s19, 0 -; VI-NEXT: s_cselect_b32 s12, 0, s1 -; VI-NEXT: s_cselect_b32 s13, 0, s0 -; VI-NEXT: s_bfe_u32 s17, s10, 0x80017 -; VI-NEXT: s_and_b32 s1, s10, 0x7fffff -; VI-NEXT: s_sub_i32 s0, 0x96, s17 -; VI-NEXT: s_or_b32 s2, s1, 0x800000 -; VI-NEXT: s_add_i32 s19, s17, 0xffffff6a -; VI-NEXT: s_lshr_b64 s[0:1], s[2:3], s0 -; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s19 -; VI-NEXT: s_addk_i32 s17, 0xff81 -; VI-NEXT: s_cmp_gt_i32 s17, 23 -; VI-NEXT: s_cselect_b32 s1, s3, s1 -; VI-NEXT: s_cselect_b32 s0, s2, s0 -; VI-NEXT: s_ashr_i32 s2, s10, 31 -; VI-NEXT: s_ashr_i32 s3, s2, 31 -; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; VI-NEXT: s_sub_u32 s0, s0, s2 -; VI-NEXT: s_subb_u32 s1, s1, s3 -; VI-NEXT: s_cmp_lt_i32 s17, 0 -; VI-NEXT: s_cselect_b32 s17, 0, s1 -; VI-NEXT: s_cselect_b32 s19, 0, s0 -; VI-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; VI-NEXT: s_and_b64 s[2:3], s[0:1], exec -; VI-NEXT: s_cselect_b32 s10, s19, s13 -; VI-NEXT: s_xor_b32 s13, s16, 0x80000000 -; VI-NEXT: s_and_b64 s[2:3], vcc, exec -; VI-NEXT: s_cselect_b32 s2, s18, s13 -; VI-NEXT: s_xor_b32 s3, s12, 0x80000000 -; VI-NEXT: s_and_b64 s[0:1], s[0:1], exec -; VI-NEXT: s_cselect_b32 s0, s17, s3 -; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: v_mov_b32_e32 v0, s10 -; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: v_mov_b32_e32 v2, s11 -; VI-NEXT: v_mov_b32_e32 v3, s2 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 -; VI-NEXT: s_nop 0 -; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_mov_b32_e32 v1, s15 -; VI-NEXT: v_mov_b32_e32 v2, s9 -; VI-NEXT: v_mov_b32_e32 v3, s14 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: s_bfe_u32 s14, s5, 0x80017 +; VI-NEXT: s_and_b32 s0, s5, 0x7fffff +; VI-NEXT: s_add_i32 s1, s14, 0xffffff6a +; VI-NEXT: s_or_b32 s12, s0, 0x800000 +; VI-NEXT: s_sub_i32 s2, 0x96, s14 +; VI-NEXT: s_lshl_b64 s[0:1], s[12:13], s1 +; VI-NEXT: s_lshr_b64 s[2:3], s[12:13], s2 +; VI-NEXT: s_addk_i32 s14, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s14, 23 +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s2 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_ashr_i32 s0, s5, 31 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-NEXT: s_ashr_i32 s1, s0, 31 +; VI-NEXT: v_xor_b32_e32 v1, s0, v1 +; VI-NEXT: s_cmp_lt_i32 s14, 0 +; VI-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 +; VI-NEXT: s_cselect_b64 s[14:15], -1, 0 +; VI-NEXT: v_sub_f32_e32 v9, s5, v6 +; VI-NEXT: v_xor_b32_e32 v0, s1, v0 +; VI-NEXT: v_mov_b32_e32 v2, s1 +; VI-NEXT: v_cndmask_b32_e64 v8, v1, 0, s[14:15] +; VI-NEXT: v_bfe_u32 v10, v9, 23, 8 +; VI-NEXT: v_and_b32_e32 v1, 0x7fffff, v9 +; VI-NEXT: v_subb_u32_e32 v7, vcc, v0, v2, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, 0xffffff6a, v10 +; VI-NEXT: v_or_b32_e32 v4, 0x800000, v1 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 0x96, v10 +; VI-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5] +; VI-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] +; VI-NEXT: v_add_u32_e32 v4, vcc, 0xffffff81, v10 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v4 +; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v9 +; VI-NEXT: v_cmp_lt_f32_e64 s[2:3], s5, v6 +; VI-NEXT: s_bfe_u32 s5, s4, 0x80017 +; VI-NEXT: s_and_b32 s12, s4, 0x7fffff +; VI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NEXT: v_xor_b32_e32 v0, v0, v2 +; VI-NEXT: s_add_i32 s16, s5, 0xffffff6a +; VI-NEXT: s_bitset1_b32 s12, 23 +; VI-NEXT: s_sub_i32 s18, 0x96, s5 +; VI-NEXT: v_xor_b32_e32 v1, v1, v3 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v4 +; VI-NEXT: s_lshl_b64 s[16:17], s[12:13], s16 +; VI-NEXT: s_lshr_b64 s[18:19], s[12:13], s18 +; VI-NEXT: s_addk_i32 s5, 0xff81 +; VI-NEXT: v_subb_u32_e32 v9, vcc, v1, v3, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] +; VI-NEXT: s_cmp_gt_i32 s5, 23 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, v8, s[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s19 +; VI-NEXT: v_mov_b32_e32 v1, s17 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v1, s18 +; VI-NEXT: v_mov_b32_e32 v3, s16 +; VI-NEXT: s_ashr_i32 s12, s4, 31 +; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; VI-NEXT: s_ashr_i32 s16, s12, 31 +; VI-NEXT: v_xor_b32_e32 v1, s12, v1 +; VI-NEXT: s_cmp_lt_i32 s5, 0 +; VI-NEXT: v_xor_b32_e32 v0, s16, v0 +; VI-NEXT: v_mov_b32_e32 v3, s16 +; VI-NEXT: v_subrev_u32_e32 v1, vcc, s12, v1 +; VI-NEXT: s_cselect_b64 s[16:17], -1, 0 +; VI-NEXT: v_sub_f32_e32 v11, s4, v6 +; VI-NEXT: v_cndmask_b32_e64 v10, v1, 0, s[16:17] +; VI-NEXT: v_bfe_u32 v12, v11, 23, 8 +; VI-NEXT: v_and_b32_e32 v1, 0x7fffff, v11 +; VI-NEXT: v_subb_u32_e32 v8, vcc, v0, v3, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, 0xffffff6a, v12 +; VI-NEXT: v_or_b32_e32 v4, 0x800000, v1 +; VI-NEXT: v_sub_u32_e32 v3, vcc, 0x96, v12 +; VI-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5] +; VI-NEXT: v_lshrrev_b64 v[3:4], v3, v[4:5] +; VI-NEXT: v_add_u32_e32 v12, vcc, 0xffffff81, v12 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v12 +; VI-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v11 +; VI-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; VI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VI-NEXT: v_xor_b32_e32 v0, v0, v3 +; VI-NEXT: v_xor_b32_e32 v1, v1, v4 +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 +; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v12 +; VI-NEXT: v_cndmask_b32_e64 v4, v9, 0, s[0:1] +; VI-NEXT: v_cndmask_b32_e64 v3, v7, 0, s[14:15] +; VI-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 +; VI-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_lt_f32_e64 s[4:5], s4, v6 +; VI-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[2:3] +; VI-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[16:17] +; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v10, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] +; VI-NEXT: s_bfe_u32 s4, s7, 0x80017 +; VI-NEXT: s_and_b32 s1, s7, 0x7fffff +; VI-NEXT: s_add_i32 s0, s4, 0xffffff6a +; VI-NEXT: s_or_b32 s12, s1, 0x800000 +; VI-NEXT: s_sub_i32 s2, 0x96, s4 +; VI-NEXT: s_lshl_b64 s[0:1], s[12:13], s0 +; VI-NEXT: s_lshr_b64 s[2:3], s[12:13], s2 +; VI-NEXT: s_addk_i32 s4, 0xff81 +; VI-NEXT: s_cmp_gt_i32 s4, 23 +; VI-NEXT: v_mov_b32_e32 v4, s3 +; VI-NEXT: v_mov_b32_e32 v7, s1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; VI-NEXT: v_mov_b32_e32 v7, s2 +; VI-NEXT: v_mov_b32_e32 v8, s0 +; VI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc +; VI-NEXT: s_ashr_i32 s0, s7, 31 +; VI-NEXT: s_ashr_i32 s1, s0, 31 +; VI-NEXT: v_xor_b32_e32 v7, s0, v7 +; VI-NEXT: v_xor_b32_e32 v4, s1, v4 +; VI-NEXT: v_mov_b32_e32 v8, s1 +; VI-NEXT: v_subrev_u32_e32 v7, vcc, s0, v7 +; VI-NEXT: s_cmp_lt_i32 s4, 0 +; VI-NEXT: v_sub_f32_e32 v13, s7, v6 +; VI-NEXT: v_subb_u32_e32 v11, vcc, v4, v8, vcc +; VI-NEXT: s_cselect_b64 s[14:15], -1, 0 +; VI-NEXT: v_bfe_u32 v14, v13, 23, 8 +; VI-NEXT: v_and_b32_e32 v4, 0x7fffff, v13 +; VI-NEXT: v_cndmask_b32_e64 v12, v7, 0, s[14:15] +; VI-NEXT: v_add_u32_e32 v7, vcc, 0xffffff6a, v14 +; VI-NEXT: v_or_b32_e32 v4, 0x800000, v4 +; VI-NEXT: v_sub_u32_e32 v9, vcc, 0x96, v14 +; VI-NEXT: v_lshlrev_b64 v[7:8], v7, v[4:5] +; VI-NEXT: v_lshrrev_b64 v[9:10], v9, v[4:5] +; VI-NEXT: v_add_u32_e32 v4, vcc, 0xffffff81, v14 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v4 +; VI-NEXT: v_cndmask_b32_e32 v7, v9, v7, vcc +; VI-NEXT: v_ashrrev_i32_e32 v9, 31, v13 +; VI-NEXT: v_cmp_lt_f32_e64 s[2:3], s7, v6 +; VI-NEXT: s_bfe_u32 s7, s6, 0x80017 +; VI-NEXT: s_and_b32 s5, s6, 0x7fffff +; VI-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc +; VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-NEXT: v_xor_b32_e32 v7, v7, v9 +; VI-NEXT: s_add_i32 s4, s7, 0xffffff6a +; VI-NEXT: s_or_b32 s12, s5, 0x800000 +; VI-NEXT: s_sub_i32 s16, 0x96, s7 +; VI-NEXT: v_xor_b32_e32 v8, v8, v10 +; VI-NEXT: v_sub_u32_e32 v7, vcc, v7, v9 +; VI-NEXT: v_cmp_gt_i32_e64 s[0:1], 0, v4 +; VI-NEXT: s_lshl_b64 s[4:5], s[12:13], s4 +; VI-NEXT: s_lshr_b64 s[12:13], s[12:13], s16 +; VI-NEXT: s_addk_i32 s7, 0xff81 +; VI-NEXT: v_subb_u32_e32 v10, vcc, v8, v10, vcc +; VI-NEXT: v_cndmask_b32_e64 v4, v7, 0, s[0:1] +; VI-NEXT: s_cmp_gt_i32 s7, 23 +; VI-NEXT: v_cndmask_b32_e64 v7, v4, v12, s[2:3] +; VI-NEXT: v_mov_b32_e32 v4, s13 +; VI-NEXT: v_mov_b32_e32 v8, s5 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; VI-NEXT: v_mov_b32_e32 v8, s12 +; VI-NEXT: v_mov_b32_e32 v9, s4 +; VI-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc +; VI-NEXT: s_ashr_i32 s4, s6, 31 +; VI-NEXT: s_ashr_i32 s5, s4, 31 +; VI-NEXT: v_xor_b32_e32 v8, s4, v8 +; VI-NEXT: v_xor_b32_e32 v4, s5, v4 +; VI-NEXT: v_mov_b32_e32 v9, s5 +; VI-NEXT: v_subrev_u32_e32 v8, vcc, s4, v8 +; VI-NEXT: s_cmp_lt_i32 s7, 0 +; VI-NEXT: v_sub_f32_e32 v14, s6, v6 +; VI-NEXT: v_subb_u32_e32 v12, vcc, v4, v9, vcc +; VI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; VI-NEXT: v_bfe_u32 v15, v14, 23, 8 +; VI-NEXT: v_and_b32_e32 v4, 0x7fffff, v14 +; VI-NEXT: v_cndmask_b32_e64 v13, v8, 0, s[12:13] +; VI-NEXT: v_add_u32_e32 v8, vcc, 0xffffff6a, v15 +; VI-NEXT: v_or_b32_e32 v4, 0x800000, v4 +; VI-NEXT: v_sub_u32_e32 v16, vcc, 0x96, v15 +; VI-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5] +; VI-NEXT: v_lshrrev_b64 v[4:5], v16, v[4:5] +; VI-NEXT: v_add_u32_e32 v15, vcc, 0xffffff81, v15 +; VI-NEXT: v_cmp_lt_i32_e32 vcc, 23, v15 +; VI-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; VI-NEXT: v_ashrrev_i32_e32 v8, 31, v14 +; VI-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; VI-NEXT: v_xor_b32_e32 v4, v4, v8 +; VI-NEXT: v_xor_b32_e32 v5, v5, v9 +; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v8 +; VI-NEXT: v_subb_u32_e32 v9, vcc, v5, v9, vcc +; VI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v15 +; VI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; VI-NEXT: v_cmp_lt_f32_e64 s[4:5], s6, v6 +; VI-NEXT: v_cndmask_b32_e64 v6, v10, 0, s[0:1] +; VI-NEXT: v_cndmask_b32_e64 v5, v4, v13, s[4:5] +; VI-NEXT: v_cndmask_b32_e64 v4, v11, 0, s[14:15] +; VI-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 +; VI-NEXT: v_cndmask_b32_e64 v8, v6, v4, s[2:3] +; VI-NEXT: v_cndmask_b32_e64 v6, v9, 0, vcc +; VI-NEXT: v_cndmask_b32_e64 v4, v12, 0, s[12:13] +; VI-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[4:5] +; VI-NEXT: buffer_store_dwordx4 v[5:8], off, s[8:11], 0 offset:16 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; VI-NEXT: s_endpgm ; ; EG-LABEL: fp_to_uint_v4f32_to_v4i64: diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index f85c208bda4fa..acdb4861fbf3d 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -181,7 +181,7 @@ body: | bb.0: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32 - ; GCN: liveins: $sgpr30_sgpr31, $sgpr10 + ; GCN: liveins: $sgpr10, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index ce13fddded6c0..969434669d34b 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -12,11 +12,12 @@ define void @callee_with_stack_and_call() #0 { ; NO-CFI-SAVES-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-CFI-SAVES-SPILL-TO-VGPR: ; %bb.0: ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s33, 2 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s4, s33 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[8:9], -1 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s4, 2 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 @@ -29,24 +30,26 @@ define void @callee_with_stack_and_call() #0 { ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 2 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s4 ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-CFI-SAVES-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; CFI-SAVES-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; CFI-SAVES-SPILL-TO-VGPR: ; %bb.0: ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s4, s33 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[8:9], -1 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] +; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s4, 4 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, exec_lo, 2 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, exec_hi, 3 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s33, 4 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 @@ -59,20 +62,22 @@ define void @callee_with_stack_and_call() #0 { ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 4 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 4 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CFI-SAVES-SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s4 ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; CFI-SAVES-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-CFI-SAVES-NO-SPILL-TO-VGPR: ; %bb.0: ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s33 -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s4, s33 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s4 +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 @@ -90,7 +95,7 @@ define void @callee_with_stack_and_call() #0 { ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -99,23 +104,25 @@ define void @callee_with_stack_and_call() #0 { ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 1 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s4, v0 +; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s4 ; NO-CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; CFI-SAVES-NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; CFI-SAVES-NO-SPILL-TO-VGPR: ; %bb.0: ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s4, s33 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s4 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, exec_lo -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, exec_hi -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s33 -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 @@ -133,7 +140,7 @@ define void @callee_with_stack_and_call() #0 { ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -142,11 +149,12 @@ define void @callee_with_stack_and_call() #0 { ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 1 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:24 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s4, v0 +; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s4 ; CFI-SAVES-NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; SPILL-TO-VGPR: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index d8098f6d7ead4..4dd0ff333a013 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -1002,27 +1002,22 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11] ; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] -; SI-NEXT: v_readfirstlane_b32 s2, v5 -; SI-NEXT: s_bfe_u32 s0, s2, 0xb0014 -; SI-NEXT: s_add_i32 s3, s0, 0xfffffc01 +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 ; SI-NEXT: s_mov_b32 s1, 0xfffff ; SI-NEXT: s_mov_b32 s0, s6 -; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s3 -; SI-NEXT: v_not_b32_e32 v6, s0 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 ; SI-NEXT: v_and_b32_e32 v6, v4, v6 -; SI-NEXT: v_not_b32_e32 v7, s1 -; SI-NEXT: v_and_b32_e32 v5, v5, v7 -; SI-NEXT: s_and_b32 s0, s2, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s3, 0 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc -; SI-NEXT: v_mov_b32_e32 v7, s0 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: s_cmp_gt_i32 s3, 51 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_mov_b32_e32 v7, s2 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] ; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -1190,21 +1185,21 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ; SI-LABEL: fast_frem_f64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 -; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s8 +; SI-NEXT: s_mov_b32 s5, s9 +; SI-NEXT: s_mov_b32 s8, s10 +; SI-NEXT: s_mov_b32 s9, s11 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -1214,29 +1209,24 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs ; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] ; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1] ; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7] -; SI-NEXT: v_readfirstlane_b32 s6, v5 -; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: s_mov_b32 s5, 0xfffff -; SI-NEXT: s_mov_b32 s4, s2 -; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7 -; SI-NEXT: v_not_b32_e32 v6, s4 +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 ; SI-NEXT: v_and_b32_e32 v6, v4, v6 -; SI-NEXT: v_not_b32_e32 v7, s5 -; SI-NEXT: v_and_b32_e32 v5, v5, v7 -; SI-NEXT: s_and_b32 s4, s6, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc -; SI-NEXT: v_mov_b32_e32 v7, s4 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_mov_b32_e32 v7, s6 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] ; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: fast_frem_f64: @@ -1383,21 +1373,21 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ; SI-LABEL: unsafe_frem_f64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s7 -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 -; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 -; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s8 +; SI-NEXT: s_mov_b32 s5, s9 +; SI-NEXT: s_mov_b32 s8, s10 +; SI-NEXT: s_mov_b32 s9, s11 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -1407,29 +1397,24 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; SI-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] ; SI-NEXT: v_fma_f64 v[8:9], -v[2:3], v[6:7], v[0:1] ; SI-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7] -; SI-NEXT: v_readfirstlane_b32 s6, v5 -; SI-NEXT: s_bfe_u32 s4, s6, 0xb0014 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: s_mov_b32 s5, 0xfffff -; SI-NEXT: s_mov_b32 s4, s2 -; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s7 -; SI-NEXT: v_not_b32_e32 v6, s4 +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 ; SI-NEXT: v_and_b32_e32 v6, v4, v6 -; SI-NEXT: v_not_b32_e32 v7, s5 -; SI-NEXT: v_and_b32_e32 v5, v5, v7 -; SI-NEXT: s_and_b32 s4, s6, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc -; SI-NEXT: v_mov_b32_e32 v7, s4 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_mov_b32_e32 v7, s6 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] ; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: unsafe_frem_f64: @@ -3140,26 +3125,21 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15] ; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] -; SI-NEXT: v_readfirstlane_b32 s8, v9 -; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014 -; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01 +; SI-NEXT: v_bfe_u32 v10, v9, 20, 11 +; SI-NEXT: v_add_i32_e32 v12, vcc, 0xfffffc01, v10 ; SI-NEXT: s_mov_b32 s3, 0xfffff -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9 -; SI-NEXT: v_not_b32_e32 v10, s0 +; SI-NEXT: v_lshr_b64 v[10:11], s[2:3], v12 +; SI-NEXT: v_not_b32_e32 v10, v10 ; SI-NEXT: v_and_b32_e32 v10, v8, v10 -; SI-NEXT: v_not_b32_e32 v11, s1 -; SI-NEXT: v_and_b32_e32 v9, v9, v11 -; SI-NEXT: s_and_b32 s0, s8, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s9, 0 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_not_b32_e32 v11, v11 +; SI-NEXT: v_and_b32_e32 v11, v9, v11 +; SI-NEXT: v_and_b32_e32 v13, 0x80000000, v9 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v12 +; SI-NEXT: v_cndmask_b32_e32 v11, v11, v13, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v12 +; SI-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc -; SI-NEXT: v_mov_b32_e32 v11, s0 -; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; SI-NEXT: s_cmp_gt_i32 s9, 51 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_mov_b32_e32 v11, s8 -; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; SI-NEXT: v_cndmask_b32_e32 v8, v10, v8, vcc +; SI-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[0:1] ; SI-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3] ; SI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1] ; SI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] @@ -3176,25 +3156,20 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: s_nop 1 ; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13] ; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] -; SI-NEXT: v_readfirstlane_b32 s8, v7 -; SI-NEXT: s_bfe_u32 s0, s8, 0xb0014 -; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s9 -; SI-NEXT: v_not_b32_e32 v8, s0 +; SI-NEXT: v_bfe_u32 v8, v7, 20, 11 +; SI-NEXT: v_add_i32_e32 v10, vcc, 0xfffffc01, v8 +; SI-NEXT: v_lshr_b64 v[8:9], s[2:3], v10 +; SI-NEXT: v_not_b32_e32 v8, v8 ; SI-NEXT: v_and_b32_e32 v8, v6, v8 -; SI-NEXT: v_not_b32_e32 v9, s1 -; SI-NEXT: v_and_b32_e32 v7, v7, v9 -; SI-NEXT: s_and_b32 s0, s8, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s9, 0 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_not_b32_e32 v9, v9 +; SI-NEXT: v_and_b32_e32 v9, v7, v9 +; SI-NEXT: v_and_b32_e32 v11, 0x80000000, v7 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v10 +; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v10 +; SI-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc -; SI-NEXT: v_mov_b32_e32 v9, s0 -; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; SI-NEXT: s_cmp_gt_i32 s9, 51 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_mov_b32_e32 v9, s8 -; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; SI-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; SI-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] ; SI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll index 1f67442c45a9e..226125335c38d 100644 --- a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll @@ -11,7 +11,7 @@ declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone ; FUNC-LABEL: {{^}}v_ftrunc_f64: ; CI: v_trunc_f64 -; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0xb0014 +; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11 ; SI: s_endpgm define amdgpu_kernel void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { %x = load double, double addrspace(1)* %in, align 8 @@ -29,11 +29,11 @@ define amdgpu_kernel void @v_ftrunc_f64(double addrspace(1)* %out, double addrsp ; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP1]] ; SI-DAG: s_andn2_b64 ; SI-DAG: cmp_gt_i32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 ; SI-DAG: cmp_lt_i32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 ; SI: s_endpgm define amdgpu_kernel void @ftrunc_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.trunc.f64(double %x) nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index a5c57be50d227..a917c4b880cfd 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -8,11 +8,11 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-LABEL: gfx_func: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_mov_b32 s36, s33 +; SDAG-NEXT: s_mov_b32 s33, s32 ; SDAG-NEXT: s_or_saveexec_b64 s[34:35], -1 -; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; SDAG-NEXT: s_mov_b64 exec, s[34:35] -; SDAG-NEXT: v_writelane_b32 v40, s33, 28 -; SDAG-NEXT: s_mov_b32 s33, s32 ; SDAG-NEXT: s_addk_i32 s32, 0x400 ; SDAG-NEXT: v_writelane_b32 v40, s4, 0 ; SDAG-NEXT: v_writelane_b32 v40, s5, 1 @@ -77,22 +77,22 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_readlane_b32 s6, v40, 2 ; SDAG-NEXT: v_readlane_b32 s5, v40, 1 ; SDAG-NEXT: v_readlane_b32 s4, v40, 0 -; SDAG-NEXT: s_addk_i32 s32, 0xfc00 -; SDAG-NEXT: v_readlane_b32 s33, v40, 28 ; SDAG-NEXT: s_or_saveexec_b64 s[34:35], -1 -; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; SDAG-NEXT: s_mov_b64 exec, s[34:35] +; SDAG-NEXT: s_addk_i32 s32, 0xfc00 +; SDAG-NEXT: s_mov_b32 s33, s36 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: gfx_func: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s36, s33 +; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[34:35] -; GISEL-NEXT: v_writelane_b32 v40, s33, 28 -; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: v_writelane_b32 v40, s5, 1 @@ -157,11 +157,11 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_readlane_b32 s6, v40, 2 ; GISEL-NEXT: v_readlane_b32 s5, v40, 1 ; GISEL-NEXT: v_readlane_b32 s4, v40, 0 -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 28 ; GISEL-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[34:35] +; GISEL-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL-NEXT: s_mov_b32 s33, s36 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] call void @extern_c_func() diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index efad478801abb..dda68f3e3a492 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -97,11 +97,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i1_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -113,11 +114,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -125,12 +127,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -142,12 +145,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -155,11 +159,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -171,11 +176,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -183,12 +189,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -200,12 +207,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i1(i1 true) @@ -216,11 +224,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i1_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -234,11 +243,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -246,12 +256,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -265,12 +276,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -278,11 +290,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -296,11 +309,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -308,12 +322,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -327,12 +342,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i1, i1 addrspace(1)* undef @@ -344,11 +360,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i1_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -362,11 +379,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -374,12 +392,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -393,12 +412,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -406,11 +426,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -424,11 +445,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -436,12 +458,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -455,12 +478,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i1, i1 addrspace(1)* undef @@ -472,11 +496,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -487,11 +512,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -499,12 +525,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -515,12 +542,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -528,11 +556,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -544,11 +573,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -556,12 +586,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -572,12 +603,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i8(i8 123) @@ -588,11 +620,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -604,11 +637,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -616,12 +650,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -633,12 +668,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -646,11 +682,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -663,11 +700,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -675,12 +713,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -692,12 +731,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i8, i8 addrspace(1)* undef @@ -709,11 +749,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -725,11 +766,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -737,12 +779,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -754,12 +797,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -767,11 +811,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -784,11 +829,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -796,12 +842,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -813,12 +860,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i8, i8 addrspace(1)* undef @@ -830,11 +878,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -845,11 +894,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -857,12 +907,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -873,12 +924,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -886,11 +938,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -902,11 +955,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -914,12 +968,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -930,12 +985,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i16(i16 123) @@ -946,11 +1002,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i16_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -962,11 +1019,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -974,12 +1032,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -991,12 +1050,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1004,11 +1064,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1021,11 +1082,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1033,12 +1095,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1050,12 +1113,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i16, i16 addrspace(1)* undef @@ -1067,11 +1131,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i16_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1083,11 +1148,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1095,12 +1161,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1112,12 +1179,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1125,11 +1193,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1142,11 +1211,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1154,12 +1224,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1171,12 +1242,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i16, i16 addrspace(1)* undef @@ -1188,11 +1260,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1203,11 +1276,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1215,12 +1289,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1231,12 +1306,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1244,11 +1320,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1260,11 +1337,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1272,12 +1350,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1288,12 +1367,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i32(i32 42) @@ -1304,11 +1384,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1320,11 +1401,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1332,12 +1414,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1349,12 +1432,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1362,11 +1446,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1378,11 +1463,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1390,12 +1476,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1407,12 +1494,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i64(i64 123) @@ -1423,11 +1511,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1440,11 +1529,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1452,12 +1542,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1470,12 +1561,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1483,11 +1575,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1500,11 +1593,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1512,12 +1606,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1530,12 +1625,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1547,11 +1643,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1565,11 +1662,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1577,12 +1675,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1596,12 +1695,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1609,11 +1709,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1626,11 +1727,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1638,12 +1740,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1657,12 +1760,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> ) @@ -1673,11 +1777,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1692,11 +1797,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1704,12 +1810,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1724,12 +1831,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1737,11 +1845,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1754,11 +1863,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1766,12 +1876,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1786,12 +1897,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1805,11 +1917,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1826,11 +1939,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1838,12 +1952,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1860,12 +1975,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1873,11 +1989,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1891,11 +2008,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1903,12 +2021,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1925,12 +2044,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1943,11 +2063,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1958,11 +2079,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1970,12 +2092,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1986,12 +2109,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1999,11 +2123,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2015,11 +2140,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2027,12 +2153,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2043,12 +2170,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f16(half 4.0) @@ -2059,11 +2187,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2074,11 +2203,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2086,12 +2216,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2102,12 +2233,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2115,11 +2247,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2131,11 +2264,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2143,12 +2277,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2159,12 +2294,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f32(float 4.0) @@ -2175,11 +2311,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2191,11 +2328,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2203,12 +2341,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2220,12 +2359,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2233,11 +2373,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2249,11 +2390,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2261,12 +2403,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2278,12 +2421,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f32(<2 x float> ) @@ -2294,11 +2438,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2311,11 +2456,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2323,12 +2469,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2341,12 +2488,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2354,11 +2502,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2371,11 +2520,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2383,12 +2533,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2401,12 +2552,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f32(<3 x float> ) @@ -2417,11 +2569,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v5f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2436,11 +2589,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2448,12 +2602,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2468,12 +2623,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2481,11 +2637,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2499,11 +2656,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2511,12 +2669,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2531,12 +2690,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5f32(<5 x float> ) @@ -2547,11 +2707,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2563,11 +2724,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2575,12 +2737,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2592,12 +2755,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2605,11 +2769,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2621,11 +2786,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2633,12 +2799,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2650,12 +2817,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f64(double 4.0) @@ -2666,11 +2834,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2684,11 +2853,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2696,12 +2866,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2715,12 +2886,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2728,11 +2900,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2745,11 +2918,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2757,12 +2931,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2776,12 +2951,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f64(<2 x double> ) @@ -2792,11 +2968,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2812,11 +2989,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2824,12 +3002,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2845,12 +3024,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2858,11 +3038,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2876,11 +3057,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2888,12 +3070,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -2909,12 +3092,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f64(<3 x double> ) @@ -2925,11 +3109,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -2940,11 +3125,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2952,12 +3138,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -2968,12 +3155,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2981,11 +3169,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -2997,11 +3186,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3009,12 +3199,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3025,12 +3216,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i16>, <2 x i16> addrspace(1)* undef @@ -3042,11 +3234,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3057,11 +3250,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3069,12 +3263,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3085,12 +3280,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3098,11 +3294,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3114,11 +3311,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3126,12 +3324,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3142,12 +3341,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x i16>, <3 x i16> addrspace(1)* undef @@ -3159,11 +3359,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3174,11 +3375,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3186,12 +3388,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3202,12 +3405,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3215,11 +3419,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3231,11 +3436,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3243,12 +3449,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3259,12 +3466,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x half>, <3 x half> addrspace(1)* undef @@ -3276,11 +3484,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3292,11 +3501,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3304,12 +3514,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3321,12 +3532,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3334,11 +3546,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3350,11 +3563,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3362,12 +3576,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3379,12 +3594,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> ) @@ -3395,11 +3611,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3411,11 +3628,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3423,12 +3641,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3440,12 +3659,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3453,11 +3673,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3470,11 +3691,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3482,12 +3704,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3499,12 +3722,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f16(<3 x half> ) @@ -3515,11 +3739,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3530,11 +3755,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3542,12 +3768,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3558,12 +3785,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3571,11 +3799,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3587,11 +3816,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3599,12 +3829,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3615,12 +3846,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i16>, <4 x i16> addrspace(1)* undef @@ -3632,11 +3864,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3648,11 +3881,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3660,12 +3894,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3677,12 +3912,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3690,11 +3926,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3707,11 +3944,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3719,12 +3957,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3736,12 +3975,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> ) @@ -3752,11 +3992,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3767,11 +4008,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3779,12 +4021,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3795,12 +4038,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3808,11 +4052,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3824,11 +4069,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3836,12 +4082,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3852,12 +4099,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x half>, <2 x half> addrspace(1)* undef @@ -3869,11 +4117,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -3884,11 +4133,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3896,12 +4146,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3912,12 +4163,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3925,11 +4177,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3941,11 +4194,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -3953,12 +4207,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3969,12 +4224,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i32>, <2 x i32> addrspace(1)* undef @@ -3986,11 +4242,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4002,11 +4259,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4014,12 +4272,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4031,12 +4290,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4044,11 +4304,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4060,11 +4321,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4072,12 +4334,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4089,12 +4352,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> ) @@ -4105,11 +4369,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4122,11 +4387,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4134,12 +4400,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4152,12 +4419,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4165,11 +4433,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4182,11 +4451,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4194,12 +4464,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4212,12 +4483,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> ) @@ -4228,11 +4500,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4246,11 +4519,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4258,12 +4532,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4277,12 +4552,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4290,11 +4566,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4307,11 +4584,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4319,12 +4597,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4338,12 +4617,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> , i32 6) @@ -4354,11 +4634,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4369,11 +4650,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4381,12 +4663,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4397,12 +4680,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4410,11 +4694,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4426,11 +4711,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4438,12 +4724,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4454,12 +4741,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i32>, <4 x i32> addrspace(1)* undef @@ -4471,11 +4759,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4489,11 +4778,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4501,12 +4791,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4520,12 +4811,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4533,11 +4825,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4550,11 +4843,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4562,12 +4856,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4581,12 +4876,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> ) @@ -4597,11 +4893,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v5i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4616,11 +4913,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4628,12 +4926,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4648,12 +4947,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4661,11 +4961,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4679,11 +4980,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4691,12 +4993,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4711,12 +5014,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> ) @@ -4727,11 +5031,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4746,11 +5051,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4758,12 +5064,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4780,12 +5087,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4793,11 +5101,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4814,11 +5123,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4826,12 +5136,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4848,12 +5159,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef @@ -4866,11 +5178,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -4888,11 +5201,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4900,12 +5214,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4923,12 +5238,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -4936,11 +5252,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4955,11 +5272,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -4967,12 +5285,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4990,12 +5309,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> ) @@ -5006,11 +5326,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5027,11 +5348,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5039,12 +5361,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5063,12 +5386,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5076,11 +5400,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5099,11 +5424,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5111,12 +5437,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5135,12 +5462,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef @@ -5153,11 +5481,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5179,11 +5508,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5191,12 +5521,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5219,12 +5550,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5232,11 +5564,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5259,11 +5592,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5271,12 +5605,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5299,12 +5634,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -5317,11 +5653,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5346,11 +5683,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5358,12 +5696,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5389,12 +5728,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5402,11 +5742,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5431,11 +5772,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5443,12 +5785,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5474,12 +5817,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -5493,11 +5837,12 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-LABEL: test_call_external_i32_func_i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -5516,11 +5861,12 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5528,12 +5874,13 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -5553,12 +5900,13 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5566,11 +5914,12 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 @@ -5592,11 +5941,12 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5604,12 +5954,13 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill @@ -5629,12 +5980,13 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42) @@ -5646,11 +5998,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5665,11 +6018,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5677,12 +6031,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5699,12 +6054,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5712,11 +6068,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5733,11 +6090,12 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5745,12 +6103,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5767,12 +6126,13 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef @@ -5785,11 +6145,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5804,11 +6165,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5816,12 +6178,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5836,12 +6199,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5849,11 +6213,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5868,11 +6233,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -5880,12 +6246,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5900,12 +6267,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = alloca { i8, i32 }, align 4, addrspace(5) @@ -5921,11 +6289,12 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -5944,16 +6313,17 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v1, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: s_addk_i32 s32, 0xf800 +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -5961,12 +6331,13 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5986,30 +6357,31 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dword v[0:1], v1, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:16 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -6028,30 +6400,31 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-NEXT: scratch_load_b32 v1, off, s33 offset:12 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: s_addk_i32 s32, 0xffe0 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:16 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_addk_i32 s32, 0xffe0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:16 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -6071,19 +6444,19 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:16 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) @@ -6106,11 +6479,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -6143,11 +6517,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -6155,12 +6530,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -6194,12 +6570,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6207,11 +6584,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -6242,11 +6620,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -6254,12 +6633,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -6293,12 +6673,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef @@ -6311,11 +6692,11 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-LABEL: tail_call_byval_align16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s6, s33 +; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 32 -; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s34, 0 ; GFX9-NEXT: v_writelane_b32 v40, s35, 1 @@ -6392,11 +6773,11 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: v_readlane_b32 s36, v40, 2 ; GFX9-NEXT: v_readlane_b32 s35, v40, 1 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 -; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_addk_i32 s32, 0xf800 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -6404,12 +6785,12 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s6, s33 +; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_mov_b32 s6, s33 -; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_writelane_b32 v40, s34, 0 ; GFX10-NEXT: v_writelane_b32 v40, s35, 1 @@ -6487,12 +6868,12 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: v_readlane_b32 s36, v40, 2 ; GFX10-NEXT: v_readlane_b32 s35, v40, 1 ; GFX10-NEXT: v_readlane_b32 s34, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6500,11 +6881,11 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:24 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_mov_b32 s4, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:24 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: v_writelane_b32 v40, s34, 0 ; GFX11-NEXT: v_writelane_b32 v40, s35, 1 @@ -6579,11 +6960,11 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: v_readlane_b32 s36, v40, 2 ; GFX11-NEXT: v_readlane_b32 s35, v40, 1 ; GFX11-NEXT: v_readlane_b32 s34, v40, 0 -; GFX11-NEXT: s_addk_i32 s32, 0xffe0 -; GFX11-NEXT: s_mov_b32 s33, s4 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:24 ; 4-byte Folded Reload +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:24 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0xffe0 +; GFX11-NEXT: s_mov_b32 s33, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -6591,12 +6972,12 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 -; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 @@ -6671,12 +7052,12 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 -; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:24 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: @@ -6690,11 +7071,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -6706,11 +7088,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -6718,12 +7101,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -6735,12 +7119,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6748,11 +7133,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -6764,11 +7150,12 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -6776,12 +7163,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -6793,12 +7181,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true) @@ -6809,11 +7198,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -6826,11 +7216,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -6838,12 +7229,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -6856,12 +7248,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6869,11 +7262,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -6887,11 +7281,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -6899,12 +7294,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -6917,12 +7313,13 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123) @@ -6933,11 +7330,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -6950,11 +7348,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -6962,12 +7361,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -6980,12 +7380,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6993,11 +7394,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -7011,11 +7413,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7023,12 +7426,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -7041,12 +7445,13 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123) @@ -7057,11 +7462,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -7074,11 +7480,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7086,12 +7493,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -7104,12 +7512,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7117,11 +7526,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -7135,11 +7545,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7147,12 +7558,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -7165,12 +7577,13 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42) @@ -7181,11 +7594,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -7201,11 +7615,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7213,12 +7628,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -7234,12 +7650,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7247,11 +7664,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -7268,11 +7686,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7280,12 +7699,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -7301,12 +7721,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123) @@ -7317,11 +7738,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -7341,11 +7763,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7353,12 +7776,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -7378,12 +7802,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7391,11 +7816,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -7416,11 +7842,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7428,12 +7855,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -7453,12 +7881,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i64>, <2 x i64> addrspace(4)* null @@ -7470,11 +7899,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -7496,11 +7926,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7508,12 +7939,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -7535,12 +7967,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7548,11 +7981,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -7575,11 +8009,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7587,12 +8022,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -7614,12 +8050,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg ) @@ -7630,11 +8067,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 8 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 8 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -7660,11 +8098,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 8 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 8 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7672,12 +8111,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 8 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -7703,12 +8143,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 8 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 8 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7716,11 +8157,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 8 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 8 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -7747,11 +8189,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 8 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 8 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7759,12 +8202,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -7790,12 +8234,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(4)* null @@ -7809,11 +8254,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 10 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -7845,11 +8291,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 10 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -7857,12 +8304,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 10 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -7894,12 +8342,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7907,11 +8356,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 10 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 10 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -7944,11 +8394,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 10 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 10 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -7956,12 +8407,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -7993,12 +8445,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(4)* null @@ -8011,11 +8464,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -8028,11 +8482,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8040,12 +8495,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -8058,12 +8514,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8071,11 +8528,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -8089,11 +8547,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8101,12 +8560,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -8119,12 +8579,13 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0) @@ -8135,11 +8596,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -8152,11 +8614,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8164,12 +8627,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -8182,12 +8646,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8195,11 +8660,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -8213,11 +8679,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8225,12 +8692,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -8243,12 +8711,13 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0) @@ -8259,11 +8728,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -8279,11 +8749,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8291,12 +8762,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8312,12 +8784,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8325,11 +8798,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -8346,11 +8820,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8358,12 +8833,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -8379,12 +8855,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg ) @@ -8395,11 +8872,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 5 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 5 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -8418,11 +8896,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 5 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 5 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8430,12 +8909,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 5 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8454,12 +8934,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 5 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 5 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8467,11 +8948,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 5 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 5 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -8491,11 +8973,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 5 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 5 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8503,12 +8986,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -8527,12 +9011,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg ) @@ -8543,11 +9028,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 7 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 7 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -8572,11 +9058,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 7 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 7 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8584,12 +9071,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 7 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8614,12 +9102,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 7 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 7 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8627,11 +9116,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 7 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 7 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -8657,11 +9147,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 7 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 7 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8669,12 +9160,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -8699,12 +9191,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg ) @@ -8715,11 +9208,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -8735,11 +9229,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8747,12 +9242,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8768,12 +9264,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8781,11 +9278,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -8802,11 +9300,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8814,12 +9313,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -8835,12 +9335,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0) @@ -8851,11 +9352,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -8877,11 +9379,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -8889,12 +9392,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8916,12 +9420,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8929,11 +9434,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -8956,11 +9462,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -8968,12 +9475,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -8995,12 +9503,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg ) @@ -9011,11 +9520,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 8 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 8 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9043,11 +9553,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 8 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 8 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9055,12 +9566,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 8 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9088,12 +9600,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 8 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 8 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9101,11 +9614,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 8 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 8 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9134,11 +9648,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 8 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 8 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9146,12 +9661,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9179,12 +9695,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 8 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg ) @@ -9195,11 +9712,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -9212,11 +9730,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9224,12 +9743,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -9242,12 +9762,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9255,11 +9776,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -9273,11 +9795,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9285,12 +9808,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -9303,12 +9827,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i16>, <2 x i16> addrspace(4)* undef @@ -9320,11 +9845,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9339,11 +9865,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9351,12 +9878,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9371,12 +9899,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9384,11 +9913,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9404,11 +9934,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9416,12 +9947,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9436,12 +9968,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x i16>, <3 x i16> addrspace(4)* undef @@ -9453,11 +9986,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9472,11 +10006,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9484,12 +10019,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9504,12 +10040,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9517,11 +10054,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9537,11 +10075,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9549,12 +10088,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9569,12 +10109,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x half>, <3 x half> addrspace(4)* undef @@ -9586,11 +10127,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9606,11 +10148,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9618,12 +10161,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9639,12 +10183,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9652,11 +10197,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9673,11 +10219,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9685,12 +10232,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9706,12 +10254,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg ) @@ -9722,11 +10271,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9742,11 +10292,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9754,12 +10305,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9775,12 +10327,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9788,11 +10341,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9809,11 +10363,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9821,12 +10376,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9842,12 +10398,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg ) @@ -9858,11 +10415,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -9877,11 +10435,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -9889,12 +10448,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -9909,12 +10469,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9922,11 +10483,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -9942,11 +10504,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -9954,12 +10517,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -9974,12 +10538,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i16>, <4 x i16> addrspace(4)* undef @@ -9991,11 +10556,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10011,11 +10577,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10023,12 +10590,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10044,12 +10612,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10057,11 +10626,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10078,11 +10648,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10090,12 +10661,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10111,12 +10683,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg ) @@ -10127,11 +10700,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -10144,11 +10718,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10156,12 +10731,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -10174,12 +10750,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10187,11 +10764,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -10205,11 +10783,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10217,12 +10796,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -10235,12 +10815,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x half>, <2 x half> addrspace(4)* undef @@ -10252,11 +10833,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10271,11 +10853,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10283,12 +10866,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10303,12 +10887,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10316,11 +10901,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10336,11 +10922,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10348,12 +10935,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10368,12 +10956,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i32>, <2 x i32> addrspace(4)* undef @@ -10385,11 +10974,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10405,11 +10995,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10417,12 +11008,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10438,12 +11030,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10451,11 +11044,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10472,11 +11066,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10484,12 +11079,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10505,12 +11101,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg ) @@ -10521,11 +11118,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 5 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 5 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10544,11 +11142,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 5 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 5 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10556,12 +11155,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 5 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10580,12 +11180,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 5 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 5 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10593,11 +11194,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 5 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 5 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10617,11 +11219,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 5 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 5 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10629,12 +11232,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10653,12 +11257,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 5 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg ) @@ -10669,11 +11274,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10695,11 +11301,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10707,12 +11314,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10734,12 +11342,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10747,11 +11356,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10774,11 +11384,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10786,12 +11397,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10813,12 +11425,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg , i32 inreg 6) @@ -10829,11 +11442,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -10852,11 +11466,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -10864,12 +11479,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10888,12 +11504,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -10901,11 +11518,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10925,11 +11543,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -10937,12 +11556,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10961,12 +11581,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i32>, <4 x i32> addrspace(4)* undef @@ -10978,11 +11599,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 6 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -11004,11 +11626,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 6 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -11016,12 +11639,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 6 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11043,12 +11667,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -11056,11 +11681,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 6 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11083,11 +11709,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 6 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 6 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -11095,12 +11722,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11122,12 +11750,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg ) @@ -11138,11 +11767,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 7 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 7 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -11167,11 +11797,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 7 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 7 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -11179,12 +11810,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 7 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11209,12 +11841,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 7 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 7 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -11222,11 +11855,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 7 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 7 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11252,11 +11886,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 7 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 7 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -11264,12 +11899,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11294,12 +11930,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 7 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg ) @@ -11310,11 +11947,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 10 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -11343,11 +11981,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 10 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -11355,12 +11994,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 10 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11389,12 +12029,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -11402,11 +12043,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 10 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 10 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11436,11 +12078,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 10 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 10 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -11448,12 +12091,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11482,12 +12126,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef @@ -11500,11 +12145,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 10 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -11538,11 +12184,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 10 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -11550,12 +12197,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 10 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11589,12 +12237,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -11602,11 +12251,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 10 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 10 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11641,11 +12291,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 10 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 10 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -11653,12 +12304,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11692,12 +12344,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg ) @@ -11708,11 +12361,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 18 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 18 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -11757,11 +12411,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 18 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 18 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -11769,12 +12424,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 18 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11819,12 +12475,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 18 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -11832,11 +12489,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 18 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 18 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11882,11 +12540,12 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 18 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 18 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -11894,12 +12553,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11944,12 +12604,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 18 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef @@ -11962,11 +12623,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 28 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 28 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -12055,11 +12717,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 28 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 28 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -12067,12 +12730,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 28 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -12162,12 +12826,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 28 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 28 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -12175,11 +12840,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 28 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 28 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12263,11 +12929,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 28 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 28 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -12275,12 +12942,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12366,12 +13034,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef @@ -12384,11 +13053,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 28 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 28 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -12482,11 +13152,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 28 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 28 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -12494,12 +13165,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 28 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -12594,12 +13266,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 28 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 28 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -12607,11 +13280,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 28 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 28 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12698,11 +13372,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 28 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 28 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -12710,12 +13385,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH: ; %bb.0: ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12806,12 +13482,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 28 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef @@ -12825,11 +13502,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -12845,11 +13523,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -12857,12 +13536,13 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -12879,12 +13559,13 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -12892,11 +13573,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -12909,11 +13591,12 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:8 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:8 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -12921,12 +13604,13 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:8 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -12939,12 +13623,13 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: @@ -12956,11 +13641,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-LABEL: stack_12xv3i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -13010,11 +13696,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -13022,12 +13709,13 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -13077,12 +13765,13 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -13090,11 +13779,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -13124,11 +13814,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -13136,12 +13827,13 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -13188,12 +13880,13 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: @@ -13217,11 +13910,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-LABEL: stack_8xv5i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -13279,11 +13973,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -13291,12 +13986,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -13354,12 +14050,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -13367,11 +14064,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -13405,11 +14103,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -13417,12 +14116,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -13474,12 +14174,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: @@ -13499,11 +14200,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-LABEL: stack_8xv5f32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -13561,11 +14263,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -13573,12 +14276,13 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -13636,12 +14340,13 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -13649,11 +14354,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -13693,11 +14399,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -13705,12 +14412,13 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill -; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -13762,12 +14470,13 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 -; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 757d5522873fc..a41ee0f454a3b 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -9,11 +9,12 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 @@ -30,11 +31,12 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -42,12 +44,13 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 4 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -64,12 +67,13 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -77,11 +81,12 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 4 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -99,11 +104,12 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 4 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 4 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_void() @@ -116,7 +122,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX9-LABEL: void_func_void_clobber_s28_s29: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v0, s28, 0 @@ -133,7 +139,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX9-NEXT: v_readlane_b32 s31, v0, 3 ; GFX9-NEXT: v_readlane_b32 s29, v0, 1 ; GFX9-NEXT: v_readlane_b32 s28, v0, 0 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -143,7 +149,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 @@ -161,7 +167,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX10-NEXT: v_readlane_b32 s31, v0, 3 ; GFX10-NEXT: v_readlane_b32 s29, v0, 1 ; GFX10-NEXT: v_readlane_b32 s28, v0, 0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 @@ -173,7 +179,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v0, s28, 0 @@ -191,7 +197,7 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX11-NEXT: v_readlane_b32 s31, v0, 3 ; GFX11-NEXT: v_readlane_b32 s29, v0, 1 ; GFX11-NEXT: v_readlane_b32 s28, v0, 0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -211,11 +217,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX9-LABEL: test_call_void_func_void_mayclobber_s31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -235,11 +242,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -247,12 +255,13 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -272,12 +281,13 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -285,11 +295,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -309,11 +320,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %s31 = call i32 asm sideeffect "; def $0", "={s31}"() @@ -326,11 +338,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-LABEL: test_call_void_func_void_mayclobber_v31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 @@ -350,11 +363,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -362,12 +376,13 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 @@ -387,12 +402,13 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -400,11 +416,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:4 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 @@ -425,11 +442,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:4 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %v31 = call i32 asm sideeffect "; def $0", "={v31}"() @@ -443,11 +461,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -460,18 +479,19 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -479,12 +499,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -497,19 +518,20 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: s_mov_b32 s33, s4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_mov_b32 s33, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -517,11 +539,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -534,18 +557,19 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_mov_b32 s4, s33 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_mov_b32 s33, s4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: s_mov_b32 s33, s4 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s33 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %s33 = call i32 asm sideeffect "; def $0", "={s33}"() @@ -558,11 +582,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -575,18 +600,19 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: s_mov_b32 s34, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -594,12 +620,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -612,19 +639,20 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: s_mov_b32 s34, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -632,11 +660,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -656,11 +685,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %s34 = call i32 asm sideeffect "; def $0", "={s34}"() @@ -673,11 +703,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_v40: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v41, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v41, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v41, s30, 0 @@ -695,11 +726,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 ; GFX9-NEXT: v_readlane_b32 s31, v41, 1 +; GFX9-NEXT: v_readlane_b32 s34, v41, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v41, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -707,12 +739,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v41, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v41, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 @@ -730,12 +763,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 ; GFX10-NEXT: v_readlane_b32 s31, v41, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v41, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v41, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -743,11 +777,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:4 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v41, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v41, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v41, s30, 0 @@ -766,11 +801,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: v_readlane_b32 s30, v41, 0 ; GFX11-NEXT: v_readlane_b32 s31, v41, 1 +; GFX11-NEXT: v_readlane_b32 s0, v41, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v41, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:4 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %v40 = call i32 asm sideeffect "; def $0", "={v40}"() @@ -783,7 +819,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX9-LABEL: void_func_void_clobber_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v0, s33, 0 @@ -791,7 +827,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s33, v0, 0 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -801,7 +837,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 @@ -810,7 +846,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s33, v0, 0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 @@ -822,7 +858,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v0, s33, 0 @@ -831,7 +867,7 @@ define hidden void @void_func_void_clobber_s33() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s33, v0, 0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -845,7 +881,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX9-LABEL: void_func_void_clobber_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v0, s34, 0 @@ -853,7 +889,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s34, v0, 0 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -863,7 +899,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 @@ -872,7 +908,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s34, v0, 0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 @@ -884,7 +920,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v0, s34, 0 @@ -893,7 +929,7 @@ define hidden void @void_func_void_clobber_s34() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s34, v0, 0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -907,11 +943,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-LABEL: test_call_void_func_void_clobber_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -921,11 +958,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -933,12 +971,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -948,12 +987,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -961,11 +1001,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -976,11 +1017,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @void_func_void_clobber_s33() @@ -991,11 +1033,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-LABEL: test_call_void_func_void_clobber_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 @@ -1005,11 +1048,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1017,12 +1061,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1032,12 +1077,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1045,11 +1091,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1060,11 +1107,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @void_func_void_clobber_s34() @@ -1075,11 +1123,12 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-LABEL: callee_saved_sgpr_kernel: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -1098,11 +1147,12 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1110,12 +1160,13 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -1134,12 +1185,13 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1147,11 +1199,12 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -1170,11 +1223,12 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 @@ -1187,11 +1241,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 @@ -1219,11 +1274,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s34, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_mov_b32 s33, s34 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1231,12 +1287,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 3 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -1264,12 +1321,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s34, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s35, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1277,11 +1335,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:4 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 3 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 @@ -1310,11 +1369,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 +; GFX11-NEXT: v_readlane_b32 s0, v40, 3 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v40, 3 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:4 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 6670cc3c882b3..010151c505ad5 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -24,11 +24,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s36, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 @@ -40,11 +40,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s36 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -52,12 +52,12 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_mov_b32 s36, s33 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 @@ -69,12 +69,12 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s36 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -82,11 +82,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v1, s33, 2 +; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 @@ -98,11 +98,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 -; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v1, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, -16 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -131,11 +131,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s36, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 @@ -147,11 +147,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s36 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -159,12 +159,12 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_mov_b32 s36, s33 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 @@ -176,12 +176,12 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s36 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -189,11 +189,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v1, s33, 2 +; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 @@ -205,11 +205,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 -; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v1, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, -16 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -238,11 +238,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s36, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 @@ -254,11 +254,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s36 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -266,12 +266,12 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_mov_b32 s36, s33 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 @@ -283,12 +283,12 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s36 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -296,11 +296,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v1, s33, 2 +; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 @@ -312,11 +312,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 -; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v1, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, -16 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -354,11 +354,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s36, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 @@ -370,11 +370,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v2, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s36 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -382,12 +382,12 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_mov_b32 s36, s33 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v2, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 @@ -399,12 +399,12 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v2, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: s_mov_b32 s33, s36 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -412,11 +412,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v2, s33, 2 +; GFX11-NEXT: s_mov_b32 s2, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v2, s31, 1 @@ -428,11 +428,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: s_add_i32 s32, s32, -16 -; GFX11-NEXT: v_readlane_b32 s33, v2, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, -16 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1634,12 +1634,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s36, s33 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 @@ -1652,11 +1652,11 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: v_readlane_b32 s33, v2, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 +; GFX9-NEXT: s_mov_b32 s33, s36 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1664,13 +1664,13 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v2, s33, 2 +; GFX10-NEXT: s_mov_b32 s36, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 @@ -1683,12 +1683,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: v_readlane_b32 s33, v2, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 +; GFX10-NEXT: s_mov_b32 s33, s36 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1696,13 +1696,13 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v5, s32 offset:2048 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v5, s33, 2 +; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x7ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffff800 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_addk_i32 s32, 0x1800 ; GFX11-NEXT: v_writelane_b32 v5, s31, 1 @@ -1715,11 +1715,11 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_readlane_b32 s30, v5, 0 ; GFX11-NEXT: v_readlane_b32 s31, v5, 1 -; GFX11-NEXT: s_addk_i32 s32, 0xe800 -; GFX11-NEXT: v_readlane_b32 s33, v5, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:2048 ; 4-byte Folded Reload +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: scratch_load_b32 v5, off, s33 offset:2048 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0xe800 +; GFX11-NEXT: s_mov_b32 s33, s34 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll index e48d4e5a0ee3d..633e7f5318eec 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll @@ -36,6 +36,7 @@ ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 +; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 0 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -64,6 +65,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() { ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 +; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -81,7 +83,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() { } ; GCN-LABEL: {{^}}queue_ptr: -; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] +; ; GCN: global_load_u8 v{{[0-9]+}}, ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2 @@ -91,11 +93,12 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() { ; WORKAROUND: .amdhsa_user_sgpr_count 15 ; NOWORKAROUND: .amdhsa_user_sgpr_count 2 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 -; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1 -; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 +; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 0 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 @@ -117,16 +120,16 @@ define amdgpu_kernel void @queue_ptr() { ; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14 ; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15 -; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8 -; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9 -; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10 +; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6 +; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7 +; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] +; GCN: global_load_u8 v{{[0-9]+}}, ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3] -; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5] -; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6 -; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7 +; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4 +; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off @@ -135,13 +138,14 @@ define amdgpu_kernel void @queue_ptr() { ; GCN: .amdhsa_kernel all_inputs ; WORKAROUND: .amdhsa_user_sgpr_count 13 -; NOWORKAROUND: .amdhsa_user_sgpr_count 8 +; NOWORKAROUND: .amdhsa_user_sgpr_count 6 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 -; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1 +; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 ; GCN-NEXT: .amdhsa_wavefront_size32 +; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 ; GCN-NEXT: .amdhsa_enable_private_segment 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 @@ -149,7 +153,7 @@ define amdgpu_kernel void @queue_ptr() { ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13 -; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8 +; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6 define amdgpu_kernel void @all_inputs() { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll index b88fabba5ed53..f061ab021368a 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s ; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { @@ -6,3 +6,6 @@ define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspa ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll index 3f7d87b98177d..295e4b5e1b407 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll @@ -9,23 +9,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1 ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret void @@ -36,23 +36,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrs ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret void @@ -63,23 +63,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrsp ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret void @@ -90,23 +90,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret void @@ -117,23 +117,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1 ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret void @@ -144,23 +144,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrs ; GFX908_GFX11: bb.0 (%ir-block.0): ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908_GFX11-NEXT: {{ $}} - ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908_GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908_GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908_GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908_GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX908_GFX11-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret void diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll index 5de0b4e87b966..05c2a0a9ff0da 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll @@ -8,25 +8,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)* ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret float %ret @@ -37,25 +37,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspa ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret float %ret @@ -66,25 +66,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspac ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) ret float %ret @@ -95,25 +95,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float ad ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data) ret float %ret @@ -124,25 +124,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)* ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret float %ret @@ -153,25 +153,25 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspa ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX11-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX11-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX11-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX11-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic ret float %ret diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll index 5b257350337a5..9b6c776c0711e 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll @@ -7,15 +7,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace( ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret void @@ -26,19 +26,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -49,15 +49,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addr ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret void @@ -68,19 +68,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrs ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -91,15 +91,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrs ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret void @@ -110,19 +110,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrsp ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -133,15 +133,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret void @@ -152,19 +152,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) ret double %ret @@ -175,15 +175,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace( ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret void @@ -194,19 +194,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1 ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[PRED_COPY4]], killed [[PRED_COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY6]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY7]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret double %ret @@ -217,15 +217,15 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addr ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret void @@ -236,19 +236,19 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrs ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY3]], %subreg.sub0, [[PRED_COPY2]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY1]], %subreg.sub0, [[PRED_COPY]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[PRED_COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX940-NEXT: $sgpr0 = PRED_COPY [[PRED_COPY5]] + ; GFX90A_GFX940-NEXT: $sgpr1 = PRED_COPY [[PRED_COPY6]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll index 2118d982d0c26..4d1386c75f82d 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll @@ -8,23 +8,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addr ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -35,23 +35,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -62,23 +62,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half> ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]] + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[PRED_COPY3]], [[PRED_COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void @@ -89,23 +89,23 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX908-NEXT: {{ $}} - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) ; GFX90A_GFX940-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret void diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll index da891c88c0ae7..6784d68cb4b5c 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll @@ -7,13 +7,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> a ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -24,13 +24,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x h ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -41,13 +41,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x ha ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[REG_SEQUENCE]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[PRED_COPY3]], [[PRED_COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret @@ -58,13 +58,13 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(< ; GFX90A_GFX940: bb.0 (%ir-block.0): ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 + ; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr1 + ; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr0 + ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1 ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[PRED_COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX940-NEXT: $vgpr0 = PRED_COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) ret <2 x half> %ret diff --git a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir index 2e94de93a5176..4900e553d98f0 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir @@ -133,14 +133,14 @@ body: | ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY %31 ; CHECK-NEXT: S_NOP 0, implicit %31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY %29 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY %29 ; CHECK-NEXT: S_NOP 0, implicit %29 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128 = COPY %27 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_128 = PRED_COPY %27 ; CHECK-NEXT: S_NOP 0, implicit %27 ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]] ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE1]] ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE2]] @@ -156,10 +156,10 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit %0 ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE6]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY3]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY2]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY1]] - ; CHECK-NEXT: S_NOP 0, implicit [[COPY]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY3]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY2]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY]] bb.0: S_NOP 0, implicit-def %0:vreg_128 S_NOP 0, implicit-def %1:vreg_128 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir index f4e2a4991bd0f..d6b68a4f875b6 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir @@ -23,12 +23,12 @@ body: | ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; CHECK-NEXT: undef %9.sub1:vreg_64 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %9.sub1 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %11.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 + ; CHECK-NEXT: undef %11.sub0:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE]].sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %11.sub0 - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 + ; CHECK-NEXT: undef %7.sub1:vreg_64 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %7.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) @@ -67,17 +67,17 @@ body: | ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %13.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 + ; CHECK-NEXT: undef %13.sub0:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE]].sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def %13.sub1 - ; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %13.sub0 + ; CHECK-NEXT: undef %15.sub0:vreg_64 = PRED_COPY %13.sub0, implicit $exec ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1 + ; CHECK-NEXT: undef %7.sub1:vreg_64 = PRED_COPY [[SI_SPILL_V64_RESTORE1]].sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def %7.sub0 - ; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %7.sub1 + ; CHECK-NEXT: undef %9.sub1:vreg_64 = PRED_COPY %7.sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: undef %14.sub0:vreg_64 = COPY %15.sub0 + ; CHECK-NEXT: undef %14.sub0:vreg_64 = PRED_COPY %15.sub0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %14.sub0 - ; CHECK-NEXT: undef %8.sub1:vreg_64 = COPY %9.sub1 + ; CHECK-NEXT: undef %8.sub1:vreg_64 = PRED_COPY %9.sub1, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit %8.sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir b/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir index 9c2b5ac92eb8f..74b9a25bd0a64 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir @@ -32,7 +32,7 @@ body: | %16.sub5:sgpr_256 = COPY %16.sub0 %16.sub6:sgpr_256 = COPY %16.sub0 %16.sub7:sgpr_256 = COPY %16.sub0 - IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource") + IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7) S_CBRANCH_SCC1 %bb.2, implicit undef $scc S_BRANCH %bb.1 @@ -41,7 +41,7 @@ body: | %30.sub2:vreg_128 = COPY %30.sub1 %30.sub3:vreg_128 = COPY %30.sub1 %26.sub1:vreg_64 = COPY %30.sub1 - IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource") + IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7) bb.2: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir index e95e66d53d623..98f95e1cc4749 100644 --- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir +++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir @@ -382,7 +382,7 @@ body: | ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) - ; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; CHECK-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ; GFX11-LABEL: name: mimg_nsa_mixed ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 @@ -390,11 +390,11 @@ body: | ; GFX11-NEXT: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 { ; GFX11-NEXT: S_CLAUSE 2 ; GFX11-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) - ; GFX11-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GFX11-NEXT: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GFX11-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ; GFX11-NEXT: } $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) - $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ... diff --git a/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info-regallocfast.ll b/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info-regallocfast.ll index ce3123b0f2d3a..53e53de1a0c22 100644 --- a/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info-regallocfast.ll +++ b/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info-regallocfast.ll @@ -18,7 +18,7 @@ define dso_local i32 @single_block_referrer_live_until_terminator() #1 !dbg !5 { ; CHECK-NEXT: DBG_DEF !8, renamable $sgpr4 ; CHECK-NEXT: INLINEASM &"S_NOP 1", 1 /* sideeffect attdialect */ ; CHECK-NEXT: INLINEASM &"S_NOP 1", 1 /* sideeffect attdialect */ - ; CHECK-NEXT: $vgpr0 = COPY killed renamable $sgpr4, debug-location !11 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed renamable $sgpr4, debug-location !11 ; CHECK-NEXT: DBG_KILL !8 ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0, debug-location !11 entry: @@ -38,7 +38,7 @@ define dso_local i32 @single_block_referrer_live_until_kill() #1 !dbg !12 { ; CHECK-NEXT: INLINEASM &"S_NOP 1", 1 /* sideeffect attdialect */ ; CHECK-NEXT: DBG_KILL !13 ; CHECK-NEXT: INLINEASM &"S_NOP 1", 1 /* sideeffect attdialect */ - ; CHECK-NEXT: $vgpr0 = COPY killed renamable $sgpr4, debug-location !15 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed renamable $sgpr4, debug-location !15 ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0, debug-location !15 entry: %0 = call i32 asm sideeffect "S_NOP 1 ; def $0", "=r"() diff --git a/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info.ll b/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info.ll index efef16919fb04..93cde68738e28 100644 --- a/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info.ll +++ b/llvm/test/CodeGen/AMDGPU/heterogeneous-debug-info.ll @@ -26,9 +26,9 @@ ; COMMON: {{^$}} ; AFTER-ISEL-NOT: DBG_ -; AFTER-ISEL: %[[#ARG_0_COPY_VREG:]]:vgpr_32 = COPY $vgpr0 +; AFTER-ISEL: %[[#ARG_0_PRED_COPY_VREG:]]:vgpr_32 = PRED_COPY $vgpr0 ; AFTER-ISEL-NOT: DBG_ -; AFTER-ISEL: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], %[[#ARG_0_COPY_VREG]] +; AFTER-ISEL: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], %[[#ARG_0_PRED_COPY_VREG]] ; AFTER-ISEL-NOT: DBG_ ; AFTER-ISEL: DBG_KILL ![[ENTRY_LIFETIME_VAR_I]] ; AFTER-ISEL-NOT: DBG_ @@ -38,9 +38,9 @@ ; AFTER-ISEL-NOT: DBG_ ; AFTER-RA-NOT: DBG_ -; AFTER-RA: renamable $vgpr[[#ARG_0_COPY_VGPR:]] = COPY killed $vgpr0 +; AFTER-RA: renamable $vgpr[[#ARG_0_PRED_COPY_VGPR:]] = PRED_COPY killed $vgpr0 ; AFTER-RA-NOT: DBG_ -; AFTER-RA: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_COPY_VGPR]] +; AFTER-RA: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_PRED_COPY_VGPR]] ; AFTER-RA-NOT: DBG_ ; AFTER-RA: DBG_KILL ![[ENTRY_LIFETIME_VAR_I]] ; AFTER-RA-NOT: DBG_ @@ -50,9 +50,9 @@ ; AFTER-RA-NOT: DBG_ ; AFTER-PEI-NOT: DBG_ -; AFTER-PEI: renamable $vgpr[[#ARG_0_COPY_VGPR:]] = COPY killed $vgpr0 +; AFTER-PEI: renamable $vgpr[[#ARG_0_PRED_COPY_VGPR:]] = PRED_COPY killed $vgpr0 ; AFTER-PEI-NOT: DBG_ -; AFTER-PEI: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_COPY_VGPR]] +; AFTER-PEI: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_PRED_COPY_VGPR]] ; AFTER-PEI-NOT: DBG_ ; AFTER-PEI: DBG_KILL ![[ENTRY_LIFETIME_VAR_I]] ; AFTER-PEI-NOT: DBG_ @@ -62,9 +62,9 @@ ; AFTER-PEI-NOT: DBG_ ; AFTER-LDV-NOT: DBG_ -; AFTER-LDV: $vgpr[[#ARG_0_COPY_VGPR:]] = V_MOV_B32_e32 killed $vgpr0, +; AFTER-LDV: $vgpr[[#ARG_0_PRED_COPY_VGPR:]] = V_MOV_B32_e32 killed $vgpr0, ; AFTER-LDV-NOT: DBG_ -; AFTER-LDV: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_COPY_VGPR]] +; AFTER-LDV: DBG_DEF ![[ENTRY_LIFETIME_VAR_I]], renamable $vgpr[[#ARG_0_PRED_COPY_VGPR]] ; AFTER-LDV-NOT: DBG_ ; AFTER-LDV: DBG_KILL ![[ENTRY_LIFETIME_VAR_I]] ; AFTER-LDV-NOT: DBG_ diff --git a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll index 5d80e99d1b9d9..ccf4c19737d9c 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s ; Make sure that with an HSA triple, we don't default to an ; unsupported device. @@ -9,3 +9,5 @@ define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspa ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll index fa0f5894cd5b2..0bf39f8f2c42a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: ; GCN: float_mode = 240 @@ -99,3 +99,6 @@ attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" } attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" } attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll index 524668db50517..efab170280195 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | FileCheck --check-prefix=HSA-CI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri | llvm-readobj --symbols -S --sd - | FileCheck %s --check-prefix=ELF ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -67,3 +67,6 @@ entry: store i32 0, i32 addrspace(1)* %out ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll index da46f0e91164c..bbbd2eb5f3822 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; CHECK: - Name: test_ro_arg ; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' @@ -30,3 +30,6 @@ define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, !1 = !{!"none", !"none"} !2 = !{!"float*", !"float*"} !3 = !{!"const restrict", !""} + +!llvm.module.flags = !{!99} +!99 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll index c223d661ac7c8..1eee6adec7b94 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -76,6 +76,9 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll index 29da063b6de47..eca7a0ba15a19 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] @@ -78,6 +78,9 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll index 83ddad21bf17d..9bb1026f2a77a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s @llvm.global_ctors = appending addrspace(1) global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 1, void ()* @foo.5, i8* null }] @@ -37,3 +37,6 @@ define internal void @bar.5() { ; CHECK: .name: amdgcn.device.fini ; PARSER: AMDGPU HSA Metadata Parser Test: PASS + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll index adae3b363746d..8dec1d296f6dc 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -1745,6 +1745,9 @@ attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index f19fbc14cc554..0188c8b33d00f 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -1870,6 +1870,9 @@ attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll index ec883d860d042..62d74dcba2313 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -299,3 +299,6 @@ define amdgpu_kernel void @test_kernel72() #2 { attributes #0 = { "amdgpu-no-heap-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll index 759f50cbbe626..be907405d665e 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -296,3 +296,6 @@ attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" } attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll index ebe46ffa45af3..409d04948cd81 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll @@ -1,10 +1,10 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s ; CHECK: amdhsa.kernels: @@ -109,8 +109,9 @@ entry: ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} !llvm.printf.fmts = !{!1, !2} - !1 = !{!"1:1:4:%d\5Cn"} !2 = !{!"2:1:8:%g\5Cn"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll index cae375da0e089..1875ec38b0bd3 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] @@ -308,3 +308,6 @@ attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" } attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll index 99b24bd671553..a9529c392cb48 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck %s ; CHECK: --- ; CHECK: amdhsa.kernels: @@ -39,6 +39,8 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0 attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"char"} @@ -46,5 +48,3 @@ attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" } !opencl.ocl.version = !{!90} !90 = !{i32 2, i32 0} - -; CHECK: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll index 734ad53e9be0d..67878b49340d5 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -301,3 +301,6 @@ attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" } attributes #4 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll index a832ca1d60aa4..54c2fbeb5a64e 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -299,3 +299,6 @@ define amdgpu_kernel void @test_kernel72() #2 { attributes #0 = { "amdgpu-no-hostcall-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll index 56477008006b4..9e65c4fee186a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque @@ -98,6 +98,9 @@ define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} + !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll index 45b3a77417e13..1b87221ddc367 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque @@ -86,6 +86,8 @@ define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, ret void } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll index 3b6fff3a681fa..8117037baaffc 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,3 +9,5 @@ ; CHECK: ... !opencl.ocl.version = !{} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll index c9d0742a00e96..674e7ef6b72a9 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,3 +7,5 @@ ; CHECK: ... !opencl.ocl.version = !{} +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll index 241979860dc1e..6c3a9693cee10 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,4 +9,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll index 4b3c4ab4844fd..53750be092444 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,4 +7,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll index 89191e6dfa1ae..ea744863a9b88 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -9,4 +9,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{i32 1} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll index 25daf1fff26ec..36a79648dd525 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. @@ -7,4 +7,6 @@ ; CHECK: ... !opencl.ocl.version = !{!0} +!llvm.module.flags = !{!1} !0 = !{i32 1} +!1 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll index 0c5555547eec4..522f0f2a21f08 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=3 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900,WAVE64 %s -; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX1010,WAVE32 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900,WAVE64 %s +; RUN: llc -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX1010,WAVE32 %s @var = addrspace(1) global float 0.0 @@ -163,3 +163,6 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 { attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index e10f96072e254..5401cdd1d357b 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX700 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX803 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX900 %s @var = addrspace(1) global float 0.0 @@ -168,3 +168,6 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 { attributes #0 = { "amdgpu-num-sgpr"="14" } attributes #1 = { "amdgpu-num-vgpr"="20" } attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll index 58479db645b1d..f1acf035e9fa6 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -299,3 +299,6 @@ define amdgpu_kernel void @test_kernel72() #2 { attributes #0 = { "amdgpu-no-multigrid-sync-arg" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll index 6e46dba419b8d..d66e45be169a1 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll @@ -1,10 +1,10 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s ; On gfx8, the queue ptr is required for this addrspacecast. @@ -76,3 +76,6 @@ declare i1 @llvm.amdgcn.is.shared(i8*) declare i1 @llvm.amdgcn.is.private(i8*) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll index b2230b4a8321f..8919676163645 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s declare void @function1() @@ -299,3 +299,6 @@ define amdgpu_kernel void @test_kernel72() #2 { attributes #0 = { "amdgpu-no-queue-ptr" } attributes #1 = { nounwind readnone speculatable willreturn } attributes #2 = { noinline } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll index cf874622eca3c..7a27993c41b98 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll @@ -1,10 +1,10 @@ ; Note: uses a randomly selected assumed external call stack size so that the ; test assertions are unlikely to succeed by accident. -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s -; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=4 -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s +; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s ; CHECK-LABEL: amdhsa.kernels @@ -135,3 +135,6 @@ define amdgpu_kernel void @test4() { } attributes #0 = { norecurse } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll index 83b0b2c9352d2..2f55a384eb644 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,38 +1,38 @@ -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI600 %s -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI601 %s -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=NONHSA-SI602 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI702 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI704 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-CI705 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI805 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 --amdhsa-code-object-version=2 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx600 | FileCheck --check-prefixes=NONHSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx601 | FileCheck --check-prefixes=NONHSA-SI601 %s +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx602 | FileCheck --check-prefixes=NONHSA-SI602 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefixes=HSA,HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefixes=HSA,HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefixes=HSA,HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefixes=HSA,HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefixes=HSA,HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx705 | FileCheck --check-prefixes=HSA,HSA-CI705 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=iceland -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefixes=HSA,HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx805 | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tongapro | FileCheck --check-prefixes=HSA,HSA-VI805 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=stoney | FileCheck --check-prefixes=HSA,HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=HSA,HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefixes=HSA,HSA-GFX903 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX904 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefixes=HSA,HSA-GFX905 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefixes=HSA,HSA-GFX906 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=HSA,HSA-GFX907 %s ; HSA: .hsa_code_object_version 2,1 ; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600" @@ -57,3 +57,6 @@ ; HSA-GFX905: .hsa_code_object_isa 9,0,5,"AMD","AMDGPU" ; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU" ; HSA-GFX907: .hsa_code_object_isa 9,0,7,"AMD","AMDGPU" + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index 61672ef1b9adb..b01e0cd6caf46 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -1,13 +1,14 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s + +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. @@ -71,9 +72,9 @@ ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple -define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { +define amdgpu_kernel void @simple(ptr addrspace(1) %out) { entry: - store i32 0, i32 addrspace(1)* %out + store i32 0, ptr addrspace(1) %out ret void } @@ -81,6 +82,9 @@ entry: ; HSA: enable_sgpr_kernarg_segment_ptr = 0 define amdgpu_kernel void @simple_no_kernargs() { entry: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir index 2213ed006df41..8f2a08eac75c1 100644 --- a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir +++ b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir @@ -36,7 +36,7 @@ body: | ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI %15, %bb.6 ; GCN-NEXT: SI_END_CF [[PHI]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec ; GCN-NEXT: S_BRANCH %bb.5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: @@ -45,8 +45,8 @@ body: | ; GCN-NEXT: ATOMIC_FENCE 5, 2 ; GCN-NEXT: S_BARRIER ; GCN-NEXT: ATOMIC_FENCE 4, 2 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY %18 - ; GCN-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[COPY6]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY %18 + ; GCN-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[COPY5]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: @@ -57,7 +57,7 @@ body: | ; GCN-NEXT: bb.5: ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, [[COPY5]], %bb.2 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, [[PRED_COPY]], %bb.2 ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} @@ -65,8 +65,8 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_1]], %bb.1, %15, %bb.6 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[COPY4]] - ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[COPY7]], [[PHI2]], implicit-def dead $scc + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[COPY4]] + ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[COPY6]], [[PHI2]], implicit-def dead $scc ; GCN-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll index 2781d1e1b5213..1c1332a53b594 100644 --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -25,21 +25,25 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX9-NEXT: s_mul_i32 s7, s3, s6 ; GFX9-NEXT: s_mul_hi_u32 s6, s2, s6 ; GFX9-NEXT: s_add_i32 s6, s6, s7 -; GFX9-NEXT: s_not_b32 s9, s6 ; GFX9-NEXT: s_mul_i32 s7, s5, s6 -; GFX9-NEXT: s_mul_i32 s9, s4, s9 -; GFX9-NEXT: s_add_i32 s8, s6, 1 ; GFX9-NEXT: s_add_i32 s7, s2, s7 -; GFX9-NEXT: s_add_i32 s9, s2, s9 ; GFX9-NEXT: s_cmp_ge_u32 s7, s4 -; GFX9-NEXT: s_cselect_b32 s6, s8, s6 -; GFX9-NEXT: s_cselect_b32 s7, s9, s7 -; GFX9-NEXT: s_add_i32 s8, s6, 1 -; GFX9-NEXT: s_cmp_ge_u32 s7, s4 -; GFX9-NEXT: s_cselect_b32 s6, s8, s6 -; GFX9-NEXT: s_add_u32 s2, s2, 1 ; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: s_add_i32 s7, s6, 1 +; GFX9-NEXT: s_not_b32 s6, s6 +; GFX9-NEXT: s_mul_i32 s6, s4, s6 +; GFX9-NEXT: v_mov_b32_e32 v4, s7 +; GFX9-NEXT: s_add_i32 s6, s2, s6 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v5, 1, v2 +; GFX9-NEXT: s_add_u32 s2, s2, 1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v3 ; GFX9-NEXT: s_addc_u32 s3, s3, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc ; GFX9-NEXT: global_store_dword v1, v2, s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, 4 ; GFX9-NEXT: s_addc_u32 s1, s1, 0 @@ -69,21 +73,23 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX10-NEXT: s_mul_i32 s7, s3, s6 ; GFX10-NEXT: s_mul_hi_u32 s6, s2, s6 ; GFX10-NEXT: s_add_i32 s6, s6, s7 -; GFX10-NEXT: s_not_b32 s8, s6 ; GFX10-NEXT: s_mul_i32 s7, s5, s6 -; GFX10-NEXT: s_mul_i32 s8, s4, s8 ; GFX10-NEXT: s_add_i32 s7, s2, s7 -; GFX10-NEXT: s_add_i32 s9, s6, 1 -; GFX10-NEXT: s_add_i32 s8, s2, s8 ; GFX10-NEXT: s_cmp_ge_u32 s7, s4 -; GFX10-NEXT: s_cselect_b32 s6, s9, s6 -; GFX10-NEXT: s_cselect_b32 s7, s8, s7 +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX10-NEXT: s_add_i32 s8, s6, 1 -; GFX10-NEXT: s_cmp_ge_u32 s7, s4 -; GFX10-NEXT: s_cselect_b32 s6, s8, s6 +; GFX10-NEXT: s_not_b32 s9, s6 +; GFX10-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-NEXT: s_mul_i32 s8, s4, s9 +; GFX10-NEXT: s_add_i32 s8, s2, s8 ; GFX10-NEXT: s_add_u32 s2, s2, 1 -; GFX10-NEXT: v_mov_b32_e32 v2, s6 +; GFX10-NEXT: v_mov_b32_e32 v3, s8 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s6, v2, vcc_lo ; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, s7, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s4, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo ; GFX10-NEXT: global_store_dword v1, v2, s[0:1] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_add_u32 s0, s0, 4 @@ -121,22 +127,26 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX11-NEXT: s_mul_i32 s7, s3, s6 ; GFX11-NEXT: s_mul_hi_u32 s6, s2, s6 ; GFX11-NEXT: s_add_i32 s6, s6, s7 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_not_b32 s8, s6 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_mul_i32 s7, s5, s6 -; GFX11-NEXT: s_mul_i32 s8, s4, s8 ; GFX11-NEXT: s_add_i32 s7, s2, s7 -; GFX11-NEXT: s_add_i32 s9, s6, 1 -; GFX11-NEXT: s_add_i32 s8, s2, s8 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_cmp_ge_u32 s7, s4 -; GFX11-NEXT: s_cselect_b32 s6, s9, s6 -; GFX11-NEXT: s_cselect_b32 s7, s8, s7 +; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX11-NEXT: s_add_i32 s8, s6, 1 -; GFX11-NEXT: s_cmp_ge_u32 s7, s4 -; GFX11-NEXT: s_cselect_b32 s6, s8, s6 +; GFX11-NEXT: s_not_b32 s9, s6 +; GFX11-NEXT: v_mov_b32_e32 v2, s8 +; GFX11-NEXT: s_mul_i32 s8, s4, s9 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-NEXT: s_add_i32 s8, s2, s8 ; GFX11-NEXT: s_add_u32 s2, s2, 1 -; GFX11-NEXT: v_mov_b32_e32 v2, s6 +; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: v_cndmask_b32_e32 v2, s6, v2, vcc_lo ; GFX11-NEXT: s_addc_u32 s3, s3, 0 +; GFX11-NEXT: v_dual_cndmask_b32 v3, s7, v3 :: v_dual_add_nc_u32 v4, 1, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, s4, v3 +; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo ; GFX11-NEXT: global_store_b32 v1, v2, s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, 4 ; GFX11-NEXT: s_addc_u32 s1, s1, 0 @@ -320,39 +330,38 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX9-LABEL: sdiv32_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s3, s[0:1], 0x2c -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_ashr_i32 s2, s3, 31 ; GFX9-NEXT: s_add_i32 s3, s3, s2 ; GFX9-NEXT: s_xor_b32 s3, s3, s2 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX9-NEXT: s_sub_i32 s5, 0, s3 +; GFX9-NEXT: s_sub_i32 s4, 0, s3 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s4, v0 +; GFX9-NEXT: s_mov_b32 s4, 0 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: .LBB2_1: ; %bb3 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_readfirstlane_b32 s6, v0 -; GFX9-NEXT: s_mul_i32 s7, s5, s6 -; GFX9-NEXT: s_mul_hi_u32 s7, s6, s7 -; GFX9-NEXT: s_add_i32 s6, s6, s7 -; GFX9-NEXT: s_mul_hi_u32 s6, s4, s6 -; GFX9-NEXT: s_mul_i32 s7, s6, s3 -; GFX9-NEXT: s_sub_i32 s7, s4, s7 -; GFX9-NEXT: s_add_i32 s8, s6, 1 -; GFX9-NEXT: s_sub_i32 s9, s7, s3 -; GFX9-NEXT: s_cmp_ge_u32 s7, s3 -; GFX9-NEXT: s_cselect_b32 s6, s8, s6 -; GFX9-NEXT: s_cselect_b32 s7, s9, s7 -; GFX9-NEXT: s_add_i32 s8, s6, 1 -; GFX9-NEXT: s_cmp_ge_u32 s7, s3 -; GFX9-NEXT: s_cselect_b32 s6, s8, s6 -; GFX9-NEXT: s_xor_b32 s6, s6, s2 -; GFX9-NEXT: s_sub_i32 s6, s6, s2 +; GFX9-NEXT: v_mul_hi_u32 v2, s4, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, v2, s3 +; GFX9-NEXT: v_add_u32_e32 v4, 1, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v2, s2, v2 ; GFX9-NEXT: s_add_i32 s4, s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_subrev_u32_e32 v2, s2, v2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v2, s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, 4 ; GFX9-NEXT: s_addc_u32 s1, s1, 0 @@ -384,19 +393,21 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX10-NEXT: s_add_i32 s6, s6, s7 ; GFX10-NEXT: s_mul_hi_u32 s6, s4, s6 ; GFX10-NEXT: s_mul_i32 s7, s6, s3 -; GFX10-NEXT: s_add_i32 s8, s6, 1 ; GFX10-NEXT: s_sub_i32 s7, s4, s7 -; GFX10-NEXT: s_sub_i32 s9, s7, s3 ; GFX10-NEXT: s_cmp_ge_u32 s7, s3 -; GFX10-NEXT: s_cselect_b32 s6, s8, s6 -; GFX10-NEXT: s_cselect_b32 s7, s9, s7 +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX10-NEXT: s_add_i32 s8, s6, 1 -; GFX10-NEXT: s_cmp_ge_u32 s7, s3 -; GFX10-NEXT: s_cselect_b32 s6, s8, s6 ; GFX10-NEXT: s_add_i32 s4, s4, 1 -; GFX10-NEXT: s_xor_b32 s6, s6, s2 -; GFX10-NEXT: s_sub_i32 s6, s6, s2 -; GFX10-NEXT: v_mov_b32_e32 v2, s6 +; GFX10-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-NEXT: s_sub_i32 s8, s7, s3 +; GFX10-NEXT: v_mov_b32_e32 v3, s8 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s6, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s7, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s2, v2 ; GFX10-NEXT: global_store_dword v1, v2, s[0:1] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_add_u32 s0, s0, 4 @@ -438,21 +449,25 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_mul_hi_u32 s6, s4, s6 ; GFX11-NEXT: s_mul_i32 s7, s6, s3 -; GFX11-NEXT: s_add_i32 s8, s6, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_sub_i32 s7, s4, s7 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_sub_i32 s9, s7, s3 ; GFX11-NEXT: s_cmp_ge_u32 s7, s3 -; GFX11-NEXT: s_cselect_b32 s6, s8, s6 -; GFX11-NEXT: s_cselect_b32 s7, s9, s7 +; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX11-NEXT: s_add_i32 s8, s6, 1 -; GFX11-NEXT: s_cmp_ge_u32 s7, s3 -; GFX11-NEXT: s_cselect_b32 s6, s8, s6 ; GFX11-NEXT: s_add_i32 s4, s4, 1 -; GFX11-NEXT: s_xor_b32 s6, s6, s2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_sub_i32 s6, s6, s2 -; GFX11-NEXT: v_mov_b32_e32 v2, s6 +; GFX11-NEXT: v_mov_b32_e32 v2, s8 +; GFX11-NEXT: s_sub_i32 s8, s7, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: v_cndmask_b32_e32 v2, s6, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_dual_cndmask_b32 v3, s7, v3 :: v_dual_add_nc_u32 v4, 1, v2 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo +; GFX11-NEXT: v_xor_b32_e32 v2, s2, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, s2, v2 ; GFX11-NEXT: global_store_b32 v1, v2, s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, 4 ; GFX11-NEXT: s_addc_u32 s1, s1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll index 0221933acbb5d..0cdecbb3387f8 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn define amdgpu_kernel void @get_local_size_x(i16 addrspace(1)* %out) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll index 1af5080038f63..660419f7dca5f 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { ; GFX8V3-LABEL: addrspacecast: @@ -15,15 +15,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX8V3-NEXT: v_mov_b32_e32 v4, 1 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0 -; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 +; GFX8V3-NEXT: v_mov_b32_e32 v0, s3 +; GFX8V3-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX8V3-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V3-NEXT: v_mov_b32_e32 v1, s3 -; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0 -; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0 +; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 +; GFX8V3-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX8V3-NEXT: v_mov_b32_e32 v2, s2 +; GFX8V3-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX8V3-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX8V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX8V3-NEXT: v_mov_b32_e32 v3, s0 +; GFX8V3-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX8V3-NEXT: flat_store_dword v[0:1], v4 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v0, 2 @@ -38,15 +40,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX8V4-NEXT: v_mov_b32_e32 v4, 1 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V4-NEXT: s_cselect_b32 s3, s3, 0 -; GFX8V4-NEXT: s_cselect_b32 s0, s0, 0 -; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 +; GFX8V4-NEXT: v_mov_b32_e32 v0, s3 +; GFX8V4-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX8V4-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V4-NEXT: v_mov_b32_e32 v1, s3 -; GFX8V4-NEXT: s_cselect_b32 s0, s2, 0 -; GFX8V4-NEXT: s_cselect_b32 s1, s1, 0 +; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 +; GFX8V4-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX8V4-NEXT: v_mov_b32_e32 v2, s2 +; GFX8V4-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX8V4-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX8V4-NEXT: v_mov_b32_e32 v2, s1 -; GFX8V4-NEXT: v_mov_b32_e32 v3, s0 +; GFX8V4-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX8V4-NEXT: flat_store_dword v[0:1], v4 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, 2 @@ -61,15 +65,17 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX8V5-NEXT: v_mov_b32_e32 v4, 1 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1 -; GFX8V5-NEXT: s_cselect_b32 s2, s2, 0 -; GFX8V5-NEXT: s_cselect_b32 s0, s0, 0 +; GFX8V5-NEXT: v_mov_b32_e32 v0, s2 +; GFX8V5-NEXT: v_mov_b32_e32 v2, s0 +; GFX8V5-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1 -; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 -; GFX8V5-NEXT: v_mov_b32_e32 v1, s2 -; GFX8V5-NEXT: s_cselect_b32 s0, s3, 0 -; GFX8V5-NEXT: s_cselect_b32 s1, s1, 0 +; GFX8V5-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc +; GFX8V5-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8V5-NEXT: v_mov_b32_e32 v2, s3 +; GFX8V5-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX8V5-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX8V5-NEXT: v_mov_b32_e32 v2, s1 -; GFX8V5-NEXT: v_mov_b32_e32 v3, s0 +; GFX8V5-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX8V5-NEXT: flat_store_dword v[0:1], v4 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, 2 @@ -82,20 +88,22 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V3-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9V3-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V3-NEXT: v_mov_b32_e32 v4, 1 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0 +; GFX9V3-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V3-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) -; GFX9V3-NEXT: s_cselect_b32 s2, s2, 0 ; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V3-NEXT: v_mov_b32_e32 v1, s2 -; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0 -; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0 +; GFX9V3-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9V3-NEXT: v_mov_b32_e32 v2, s0 +; GFX9V3-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V3-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX9V3-NEXT: v_mov_b32_e32 v2, s1 -; GFX9V3-NEXT: v_mov_b32_e32 v3, s0 +; GFX9V3-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX9V3-NEXT: flat_store_dword v[0:1], v4 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: v_mov_b32_e32 v0, 2 @@ -108,20 +116,22 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V4-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9V4-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V4-NEXT: v_mov_b32_e32 v4, 1 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0 +; GFX9V4-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V4-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) -; GFX9V4-NEXT: s_cselect_b32 s2, s2, 0 ; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V4-NEXT: v_mov_b32_e32 v1, s2 -; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0 -; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0 +; GFX9V4-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9V4-NEXT: v_mov_b32_e32 v2, s0 +; GFX9V4-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V4-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX9V4-NEXT: v_mov_b32_e32 v2, s1 -; GFX9V4-NEXT: v_mov_b32_e32 v3, s0 +; GFX9V4-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX9V4-NEXT: flat_store_dword v[0:1], v4 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: v_mov_b32_e32 v0, 2 @@ -134,20 +144,22 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V5-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9V5-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V5-NEXT: v_mov_b32_e32 v4, 1 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1 -; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0 +; GFX9V5-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V5-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) -; GFX9V5-NEXT: s_cselect_b32 s2, s2, 0 ; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1 -; GFX9V5-NEXT: v_mov_b32_e32 v1, s2 -; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0 -; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0 +; GFX9V5-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9V5-NEXT: v_mov_b32_e32 v2, s0 +; GFX9V5-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9V5-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc ; GFX9V5-NEXT: v_mov_b32_e32 v2, s1 -; GFX9V5-NEXT: v_mov_b32_e32 v3, s0 +; GFX9V5-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX9V5-NEXT: flat_store_dword v[0:1], v4 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, 2 @@ -462,9 +474,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) @@ -481,9 +491,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc -; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc -; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) @@ -500,9 +508,7 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[0:1] glc -; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc -; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V5-NEXT: ; kill: killed $sgpr0_sgpr1 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -533,3 +539,6 @@ declare i1 @llvm.amdgcn.is.shared(i8*) declare i1 @llvm.amdgcn.is.private(i8*) declare void @llvm.trap() declare void @llvm.debugtrap() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll index aca0a07263442..ba0156213ae48 100644 --- a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll @@ -1,11 +1,11 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=CHECK %s ; CHECK-LABEL: test_unaligned_to_eight: ; CHECK: .amdhsa_kernarg_size 264 define amdgpu_kernel void @test_unaligned_to_eight(i32 %four) { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + store volatile ptr addrspace(4) %implicitarg.ptr, ptr addrspace(1) undef ret void } @@ -13,8 +13,8 @@ define amdgpu_kernel void @test_unaligned_to_eight(i32 %four) { ; CHECK-LABEL: test_aligned_to_eight: ; CHECK: .amdhsa_kernarg_size 264 define amdgpu_kernel void @test_aligned_to_eight(i64 %eight) { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + store volatile ptr addrspace(4) %implicitarg.ptr, ptr addrspace(1) undef ret void } @@ -55,4 +55,7 @@ define amdgpu_kernel void @test_aligned_to_eight(i64 %eight) { ; CHECK-NEXT: .kernarg_segment_size: 264 ; CHECK-LABEL: .name: test_aligned_to_eight -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/attributor.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll similarity index 52% rename from llvm/test/CodeGen/AMDGPU/attributor.ll rename to llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll index b7a219958823a..0fba6abbc675b 100644 --- a/llvm/test/CodeGen/AMDGPU/attributor.ll +++ b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll @@ -6,10 +6,12 @@ target triple = "amdgcn-amd-amdhsa" ; offsets of the phi cannot be determined, and hence the attirbutor assumes that ; hostcall is in use. +; CHECK-LABEL: amdhsa.kernels: ; CHECK: .value_kind: hidden_hostcall_buffer ; CHECK: .value_kind: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_1 -define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { +define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { entry: %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() br i1 %cond, label %old, label %new @@ -35,6 +37,32 @@ join: ; preds = %new, %old ret void } +; The call to intrinsic implicitarg_ptr is combined with an offset produced by +; select'ing between two constants, before it is eventually used in a GEP to +; form the address of a load. This test ensures that AAPointerInfo can look +; through the select to maintain a set of indices, so that it can precisely +; determine that hostcall and other expensive implicit args are not in use. + +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-NOT: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_2 + +define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) { +entry: + %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %tmp5 = select i1 %cond, i64 12, i64 18 + %tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5 + %tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)* + + ;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr + %tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2 + + %idx.ext = sext i16 %tmp9 to i64 + %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext + %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index 1944f813f74e9..2cc0fb90402b6 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -15,14 +15,14 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN-NEXT: {{ $}} ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 - ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 + ; GCN-NEXT: renamable $sgpr6 = PRED_COPY renamable $sgpr1 + ; GCN-NEXT: renamable $sgpr0 = PRED_COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1 - ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 - ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 - ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 + ; GCN-NEXT: undef renamable $sgpr0 = PRED_COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr1 = PRED_COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $sgpr2 = PRED_COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $sgpr3 = PRED_COPY killed renamable $sgpr4 ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 @@ -40,38 +40,38 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 - ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15 - ; GCN-NEXT: renamable $vgpr30 = COPY killed renamable $sgpr14 - ; GCN-NEXT: renamable $vgpr29 = COPY killed renamable $sgpr13 - ; GCN-NEXT: renamable $vgpr28 = COPY killed renamable $sgpr12 - ; GCN-NEXT: renamable $vgpr27 = COPY killed renamable $sgpr11 - ; GCN-NEXT: renamable $vgpr26 = COPY killed renamable $sgpr10 - ; GCN-NEXT: renamable $vgpr25 = COPY killed renamable $sgpr9 - ; GCN-NEXT: renamable $vgpr24 = COPY killed renamable $sgpr8 - ; GCN-NEXT: renamable $vgpr23 = COPY killed renamable $sgpr7 - ; GCN-NEXT: renamable $vgpr22 = COPY killed renamable $sgpr6 - ; GCN-NEXT: renamable $vgpr21 = COPY killed renamable $sgpr5 - ; GCN-NEXT: renamable $vgpr20 = COPY killed renamable $sgpr4 - ; GCN-NEXT: renamable $vgpr19 = COPY killed renamable $sgpr3 - ; GCN-NEXT: renamable $vgpr18 = COPY killed renamable $sgpr2 - ; GCN-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr1 - ; GCN-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr0 - ; GCN-NEXT: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GCN-NEXT: renamable $vgpr1 = COPY killed renamable $vgpr30 - ; GCN-NEXT: renamable $vgpr2 = COPY killed renamable $vgpr29 - ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $vgpr28 - ; GCN-NEXT: renamable $vgpr4 = COPY killed renamable $vgpr27 - ; GCN-NEXT: renamable $vgpr5 = COPY killed renamable $vgpr26 - ; GCN-NEXT: renamable $vgpr6 = COPY killed renamable $vgpr25 - ; GCN-NEXT: renamable $vgpr7 = COPY killed renamable $vgpr24 - ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr23 - ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr22 - ; GCN-NEXT: renamable $vgpr10 = COPY killed renamable $vgpr21 - ; GCN-NEXT: renamable $vgpr11 = COPY killed renamable $vgpr20 - ; GCN-NEXT: renamable $vgpr12 = COPY killed renamable $vgpr19 - ; GCN-NEXT: renamable $vgpr13 = COPY killed renamable $vgpr18 - ; GCN-NEXT: renamable $vgpr14 = COPY killed renamable $vgpr17 - ; GCN-NEXT: renamable $vgpr15 = COPY killed renamable $vgpr16 + ; GCN-NEXT: renamable $vgpr0 = PRED_COPY killed renamable $sgpr15 + ; GCN-NEXT: renamable $vgpr30 = PRED_COPY killed renamable $sgpr14 + ; GCN-NEXT: renamable $vgpr29 = PRED_COPY killed renamable $sgpr13 + ; GCN-NEXT: renamable $vgpr28 = PRED_COPY killed renamable $sgpr12 + ; GCN-NEXT: renamable $vgpr27 = PRED_COPY killed renamable $sgpr11 + ; GCN-NEXT: renamable $vgpr26 = PRED_COPY killed renamable $sgpr10 + ; GCN-NEXT: renamable $vgpr25 = PRED_COPY killed renamable $sgpr9 + ; GCN-NEXT: renamable $vgpr24 = PRED_COPY killed renamable $sgpr8 + ; GCN-NEXT: renamable $vgpr23 = PRED_COPY killed renamable $sgpr7 + ; GCN-NEXT: renamable $vgpr22 = PRED_COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $vgpr21 = PRED_COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $vgpr20 = PRED_COPY killed renamable $sgpr4 + ; GCN-NEXT: renamable $vgpr19 = PRED_COPY killed renamable $sgpr3 + ; GCN-NEXT: renamable $vgpr18 = PRED_COPY killed renamable $sgpr2 + ; GCN-NEXT: renamable $vgpr17 = PRED_COPY killed renamable $sgpr1 + ; GCN-NEXT: renamable $vgpr16 = PRED_COPY killed renamable $sgpr0 + ; GCN-NEXT: undef renamable $vgpr0 = PRED_COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN-NEXT: renamable $vgpr1 = PRED_COPY killed renamable $vgpr30 + ; GCN-NEXT: renamable $vgpr2 = PRED_COPY killed renamable $vgpr29 + ; GCN-NEXT: renamable $vgpr3 = PRED_COPY killed renamable $vgpr28 + ; GCN-NEXT: renamable $vgpr4 = PRED_COPY killed renamable $vgpr27 + ; GCN-NEXT: renamable $vgpr5 = PRED_COPY killed renamable $vgpr26 + ; GCN-NEXT: renamable $vgpr6 = PRED_COPY killed renamable $vgpr25 + ; GCN-NEXT: renamable $vgpr7 = PRED_COPY killed renamable $vgpr24 + ; GCN-NEXT: renamable $vgpr8 = PRED_COPY killed renamable $vgpr23 + ; GCN-NEXT: renamable $vgpr9 = PRED_COPY killed renamable $vgpr22 + ; GCN-NEXT: renamable $vgpr10 = PRED_COPY killed renamable $vgpr21 + ; GCN-NEXT: renamable $vgpr11 = PRED_COPY killed renamable $vgpr20 + ; GCN-NEXT: renamable $vgpr12 = PRED_COPY killed renamable $vgpr19 + ; GCN-NEXT: renamable $vgpr13 = PRED_COPY killed renamable $vgpr18 + ; GCN-NEXT: renamable $vgpr14 = PRED_COPY killed renamable $vgpr17 + ; GCN-NEXT: renamable $vgpr15 = PRED_COPY killed renamable $vgpr16 ; GCN-NEXT: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -91,7 +91,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN-NEXT: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec ; GCN-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = PRED_COPY renamable $sgpr0_sgpr1 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll index 36053945e1341..dd645fa1eed31 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -12,30 +12,48 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7 ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s7 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: s_load_dword s7, s[4:5], 0x0 +; CHECK-NEXT: s_mov_b32 s33, s6 +; CHECK-NEXT: v_mov_b32_e32 v31, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp1_b32 s4, 0 +; CHECK-NEXT: s_cselect_b64 vcc, -1, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 -; CHECK-NEXT: s_getpc_b64 s[8:9] -; CHECK-NEXT: s_add_u32 s8, s8, snork@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s9, s9, snork@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 -; CHECK-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, snork@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, snork@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[4:5], 0x0 +; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], exec ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_and_b32 s4, 1, s7 -; CHECK-NEXT: s_cmp_eq_u32 s4, 1 -; CHECK-NEXT: v_mov_b32_e32 v31, v0 -; CHECK-NEXT: s_cselect_b32 s5, s13, s11 -; CHECK-NEXT: s_cselect_b32 s4, s12, s10 -; CHECK-NEXT: s_mov_b32 s12, s6 +; CHECK-NEXT: v_mov_b32_e32 v0, s9 +; CHECK-NEXT: v_mov_b32_e32 v1, s11 +; CHECK-NEXT: v_mov_b32_e32 v2, s8 +; CHECK-NEXT: v_mov_b32_e32 v4, s10 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_readfirstlane_b32 s4, v2 +; CHECK-NEXT: v_readfirstlane_b32 s5, v3 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] +; CHECK-NEXT: s_and_saveexec_b64 s[34:35], vcc +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: s_mov_b32 s12, s33 +; CHECK-NEXT: v_mov_b32_e32 v4, v1 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr31 +; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: s_xor_b64 exec, exec, s[34:35] +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 46bbfb688ab5a..da84f0dc6a589 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4 @@ -393,11 +393,12 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-LABEL: test_indirect_call_vgpr_ptr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 18 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -464,22 +465,24 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 18 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 18 +; GISEL-NEXT: s_mov_b32 s16, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 +; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL-NEXT: s_mov_b64 exec, s[18:19] +; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -546,11 +549,12 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b32 s33, s4 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr() @@ -561,11 +565,12 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 18 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -635,22 +640,24 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 18 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 18 +; GISEL-NEXT: s_mov_b32 s16, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 +; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL-NEXT: s_mov_b64 exec, s[18:19] +; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -718,11 +725,12 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b32 s33, s4 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr(i32 123) @@ -733,11 +741,12 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 18 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -806,22 +815,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 18 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 18 +; GISEL-NEXT: s_mov_b32 s16, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 +; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL-NEXT: s_mov_b64 exec, s[18:19] +; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -890,11 +901,12 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b32 s33, s4 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] %a = call i32 %fptr() @@ -906,11 +918,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 20 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 20 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -988,22 +1001,24 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 20 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 20 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: ; GISEL: ; %bb.0: ; %bb0 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 20 +; GISEL-NEXT: s_mov_b32 s16, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 +; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL-NEXT: s_mov_b64 exec, s[18:19] +; GISEL-NEXT: v_writelane_b32 v40, s16, 20 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -1081,11 +1096,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 20 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 20 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b32 s33, s4 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] bb0: @@ -1103,11 +1119,11 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 +; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -1186,22 +1202,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 +; GISEL-NEXT: s_mov_b32 s5, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -1280,11 +1296,11 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b32 s33, s5 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 inreg 123) @@ -1295,11 +1311,11 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s10, s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 -; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v40, s34, 0 @@ -1382,22 +1398,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, s10 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, s33 +; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 -; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 @@ -1480,11 +1496,11 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL-NEXT: s_mov_b32 s33, s10 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 %i) @@ -1499,11 +1515,11 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s10, s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 -; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -1584,22 +1600,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, s10 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, s33 +; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 -; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -1680,11 +1696,11 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL-NEXT: s_mov_b32 s33, s10 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] %ret = call amdgpu_gfx i32 %fptr(i32 %i) @@ -1696,11 +1712,11 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s10, s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 -; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 @@ -1778,22 +1794,22 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, s10 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, s33 +; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 -; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 @@ -1871,13 +1887,16 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL-NEXT: s_mov_b32 s33, s10 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] tail call amdgpu_gfx void %fptr() ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index 23991e875ae48..0c4b937b54034 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -9,14 +9,14 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_128 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -27,14 +27,14 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_128_align2 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:VReg_128_Align2 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -45,14 +45,14 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:AReg_128 */, def %4 - ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128 = PRED_COPY %4 + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[PRED_COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128_Align2 */, def %4 - ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128_align2 = PRED_COPY %4 + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128_Align2 */, [[PRED_COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index f27c6800c69e4..a0bf023ba1e6f 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -586,18 +586,21 @@ define amdgpu_kernel void @double2_inselt(<2 x double> addrspace(1)* %out, <2 x ; GCN-NEXT: s_load_dword s2, s[0:1], 0x44 ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 0x3ff00000 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_eq_u32 s2, 1 -; GCN-NEXT: s_cselect_b32 s3, 0x3ff00000, s7 -; GCN-NEXT: s_cselect_b32 s6, 0, s6 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s6 ; GCN-NEXT: s_cmp_eq_u32 s2, 0 -; GCN-NEXT: s_cselect_b32 s2, 0x3ff00000, s5 -; GCN-NEXT: s_cselect_b32 s4, 0, s4 -; GCN-NEXT: v_mov_b32_e32 v5, s1 +; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc ; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s2 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: v_mov_b32_e32 v5, s1 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm @@ -614,45 +617,51 @@ define amdgpu_kernel void @double5_inselt(<5 x double> addrspace(1)* %out, <5 x ; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x84 ; GCN-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x24 ; GCN-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x64 +; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_eq_u32 s12, 4 -; GCN-NEXT: s_cselect_b32 s9, 0x3ff00000, s9 -; GCN-NEXT: s_cselect_b32 s8, 0, s8 +; GCN-NEXT: v_mov_b32_e32 v0, s9 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v9, v0, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s8 ; GCN-NEXT: s_cmp_eq_u32 s12, 1 -; GCN-NEXT: s_cselect_b32 s3, 0x3ff00000, s3 -; GCN-NEXT: s_cselect_b32 s2, 0, s2 +; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s2 ; GCN-NEXT: s_cmp_eq_u32 s12, 0 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_mov_b32_e32 v5, s9 -; GCN-NEXT: s_cselect_b32 s8, 0x3ff00000, s1 -; GCN-NEXT: s_cselect_b32 s9, 0, s0 +; GCN-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s1 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: s_cmp_eq_u32 s12, 3 -; GCN-NEXT: s_cselect_b32 s0, 0x3ff00000, s7 -; GCN-NEXT: s_cselect_b32 s1, 0, s6 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GCN-NEXT: v_mov_b32_e32 v5, s7 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v5, s6 ; GCN-NEXT: s_cmp_eq_u32 s12, 2 -; GCN-NEXT: s_cselect_b32 s5, 0x3ff00000, s5 -; GCN-NEXT: s_cselect_b32 s4, 0, s4 -; GCN-NEXT: v_mov_b32_e32 v3, s0 +; GCN-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_add_u32 s0, s10, 16 -; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: s_addc_u32 s1, s11, 0 -; GCN-NEXT: v_mov_b32_e32 v7, s1 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: v_mov_b32_e32 v6, s0 -; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] -; GCN-NEXT: v_mov_b32_e32 v6, s10 -; GCN-NEXT: v_mov_b32_e32 v0, s9 -; GCN-NEXT: v_mov_b32_e32 v1, s8 -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v3, s3 -; GCN-NEXT: v_mov_b32_e32 v7, s11 +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v4, s4 +; GCN-NEXT: v_mov_b32_e32 v11, s1 +; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GCN-NEXT: v_mov_b32_e32 v10, s0 +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] ; GCN-NEXT: s_add_u32 s0, s10, 32 -; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] +; GCN-NEXT: v_mov_b32_e32 v4, s10 +; GCN-NEXT: v_mov_b32_e32 v5, s11 +; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_addc_u32 s1, s11, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; GCN-NEXT: flat_store_dwordx2 v[0:1], v[8:9] ; GCN-NEXT: s_endpgm entry: %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index f1b4399fad099..4d52d1e33da0a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -685,14 +685,16 @@ define amdgpu_kernel void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* % ; SI-NEXT: s_mov_b32 s7, 0x100f000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: v_mov_b32_e32 v0, s3 ; SI-NEXT: s_cmp_lg_u32 s8, 1 -; SI-NEXT: s_cselect_b32 s0, s3, 5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s8, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_cselect_b32 s1, s2, 5 -; SI-NEXT: v_mov_b32_e32 v0, s1 -; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -728,14 +730,17 @@ define amdgpu_kernel void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* % ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s8, 2 -; SI-NEXT: s_cselect_b32 s2, s2, 5 +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s8, 1 -; SI-NEXT: s_cselect_b32 s1, s1, 5 +; SI-NEXT: v_cndmask_b32_e32 v2, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s8, 0 -; SI-NEXT: s_cselect_b32 s0, s0, 5 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v0, vcc ; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_mov_b32_e32 v2, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; SI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -766,26 +771,31 @@ define amdgpu_kernel void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* % define amdgpu_kernel void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b, [8 x i32], i32 %val) nounwind { ; SI-LABEL: dynamic_insertelement_v4i32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4 -; SI-NEXT: s_load_dword s8, s[4:5], 0x8 -; SI-NEXT: s_load_dword s9, s[4:5], 0x11 -; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; SI-NEXT: s_mov_b32 s7, 0x100f000 -; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_load_dword s6, s[4:5], 0x8 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x4 +; SI-NEXT: s_load_dword s4, s[4:5], 0x11 +; SI-NEXT: s_mov_b32 s3, 0x100f000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_cmp_eq_u32 s8, 3 -; SI-NEXT: s_cselect_b32 s3, s9, s3 -; SI-NEXT: s_cmp_eq_u32 s8, 2 -; SI-NEXT: s_cselect_b32 s2, s9, s2 -; SI-NEXT: s_cmp_eq_u32 s8, 1 -; SI-NEXT: s_cselect_b32 s1, s9, s1 -; SI-NEXT: s_cmp_eq_u32 s8, 0 -; SI-NEXT: s_cselect_b32 s0, s9, s0 -; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_mov_b32_e32 v2, s2 -; SI-NEXT: v_mov_b32_e32 v3, s3 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_cmp_eq_u32 s6, 3 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_mov_b32_e32 v0, s11 +; SI-NEXT: v_mov_b32_e32 v4, s4 +; SI-NEXT: s_cmp_eq_u32 s6, 2 +; SI-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v0, s10 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_eq_u32 s6, 1 +; SI-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v0, s9 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_eq_u32 s6, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v0, s8 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: dynamic_insertelement_v4i32: @@ -1212,88 +1222,116 @@ define amdgpu_kernel void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* % ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshr_b32 s4, s11, 24 ; SI-NEXT: s_cmp_lg_u32 s6, 15 -; SI-NEXT: s_cselect_b32 s4, s4, 5 -; SI-NEXT: s_lshl_b32 s4, s4, 8 -; SI-NEXT: s_lshr_b32 s5, s11, 16 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_lshr_b32 s4, s11, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 14 -; SI-NEXT: s_cselect_b32 s5, s5, 5 -; SI-NEXT: s_and_b32 s5, s5, 0xff -; SI-NEXT: s_or_b32 s4, s5, s4 -; SI-NEXT: s_lshl_b32 s4, s4, 16 -; SI-NEXT: s_lshr_b32 s5, s11, 8 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: s_lshr_b32 s4, s11, 8 +; SI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 13 -; SI-NEXT: s_cselect_b32 s5, s5, 5 -; SI-NEXT: s_lshl_b32 s5, s5, 8 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s6, 12 -; SI-NEXT: s_cselect_b32 s7, s11, 5 -; SI-NEXT: s_and_b32 s7, s7, 0xff -; SI-NEXT: s_or_b32 s5, s7, s5 -; SI-NEXT: s_and_b32 s5, s5, 0xffff -; SI-NEXT: s_or_b32 s4, s5, s4 -; SI-NEXT: s_lshr_b32 s5, s10, 24 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: v_mov_b32_e32 v2, s11 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v2, 5, v2, vcc +; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 +; SI-NEXT: v_or_b32_e32 v1, v2, v1 +; SI-NEXT: s_lshr_b32 s4, s10, 24 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 11 -; SI-NEXT: s_cselect_b32 s5, s5, 5 -; SI-NEXT: s_lshl_b32 s5, s5, 8 -; SI-NEXT: s_lshr_b32 s7, s10, 16 +; SI-NEXT: v_or_b32_e32 v3, v1, v0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_lshr_b32 s4, s10, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 10 -; SI-NEXT: s_cselect_b32 s7, s7, 5 -; SI-NEXT: s_and_b32 s7, s7, 0xff -; SI-NEXT: s_or_b32 s5, s7, s5 -; SI-NEXT: s_lshl_b32 s5, s5, 16 -; SI-NEXT: s_lshr_b32 s7, s10, 8 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: s_lshr_b32 s4, s10, 8 +; SI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 9 -; SI-NEXT: s_cselect_b32 s7, s7, 5 -; SI-NEXT: s_lshl_b32 s7, s7, 8 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s6, 8 -; SI-NEXT: s_cselect_b32 s10, s10, 5 -; SI-NEXT: s_and_b32 s10, s10, 0xff -; SI-NEXT: s_or_b32 s7, s10, s7 -; SI-NEXT: s_and_b32 s7, s7, 0xffff -; SI-NEXT: s_or_b32 s5, s7, s5 -; SI-NEXT: s_lshr_b32 s7, s9, 24 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: v_mov_b32_e32 v2, s10 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v2, 5, v2, vcc +; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 +; SI-NEXT: v_or_b32_e32 v1, v2, v1 +; SI-NEXT: s_lshr_b32 s4, s9, 24 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 7 -; SI-NEXT: s_cselect_b32 s7, s7, 5 -; SI-NEXT: s_lshl_b32 s7, s7, 8 -; SI-NEXT: s_lshr_b32 s10, s9, 16 +; SI-NEXT: v_or_b32_e32 v2, v1, v0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_lshr_b32 s4, s9, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 6 -; SI-NEXT: s_cselect_b32 s10, s10, 5 -; SI-NEXT: s_and_b32 s10, s10, 0xff -; SI-NEXT: s_or_b32 s7, s10, s7 -; SI-NEXT: s_lshl_b32 s7, s7, 16 -; SI-NEXT: s_lshr_b32 s10, s9, 8 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: s_lshr_b32 s4, s9, 8 +; SI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 5 -; SI-NEXT: s_cselect_b32 s10, s10, 5 -; SI-NEXT: s_lshl_b32 s10, s10, 8 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s6, 4 -; SI-NEXT: s_cselect_b32 s9, s9, 5 -; SI-NEXT: s_and_b32 s9, s9, 0xff -; SI-NEXT: s_or_b32 s9, s9, s10 -; SI-NEXT: s_and_b32 s9, s9, 0xffff -; SI-NEXT: s_or_b32 s7, s9, s7 -; SI-NEXT: s_lshr_b32 s9, s8, 24 +; SI-NEXT: v_cndmask_b32_e32 v1, 5, v1, vcc +; SI-NEXT: v_mov_b32_e32 v4, s9 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, 5, v4, vcc +; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; SI-NEXT: v_and_b32_e32 v4, 0xff, v4 +; SI-NEXT: v_or_b32_e32 v1, v4, v1 +; SI-NEXT: s_lshr_b32 s4, s8, 24 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_cmp_lg_u32 s6, 3 -; SI-NEXT: s_cselect_b32 s9, s9, 5 -; SI-NEXT: s_lshl_b32 s9, s9, 8 -; SI-NEXT: s_lshr_b32 s10, s8, 16 +; SI-NEXT: v_or_b32_e32 v1, v1, v0 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_lshr_b32 s4, s8, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 2 -; SI-NEXT: s_cselect_b32 s10, s10, 5 -; SI-NEXT: s_and_b32 s10, s10, 0xff -; SI-NEXT: s_or_b32 s9, s10, s9 -; SI-NEXT: s_lshl_b32 s9, s9, 16 -; SI-NEXT: s_lshr_b32 s10, s8, 8 +; SI-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc +; SI-NEXT: v_mov_b32_e32 v4, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, 5, v4, vcc +; SI-NEXT: s_lshr_b32 s4, s8, 8 +; SI-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; SI-NEXT: v_and_b32_e32 v4, 0xff, v4 ; SI-NEXT: s_cmp_lg_u32 s6, 1 -; SI-NEXT: s_cselect_b32 s10, s10, 5 -; SI-NEXT: s_lshl_b32 s10, s10, 8 +; SI-NEXT: v_or_b32_e32 v0, v4, v0 +; SI-NEXT: v_mov_b32_e32 v4, s4 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cselect_b32 s6, s8, 5 -; SI-NEXT: s_and_b32 s6, s6, 0xff -; SI-NEXT: s_or_b32 s6, s6, s10 -; SI-NEXT: s_and_b32 s6, s6, 0xffff -; SI-NEXT: s_or_b32 s6, s6, s9 -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: v_mov_b32_e32 v2, s5 -; SI-NEXT: v_mov_b32_e32 v3, s4 +; SI-NEXT: v_cndmask_b32_e32 v4, 5, v4, vcc +; SI-NEXT: v_mov_b32_e32 v5, s8 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v5, 5, v5, vcc +; SI-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; SI-NEXT: v_and_b32_e32 v5, 0xff, v5 +; SI-NEXT: v_or_b32_e32 v4, v5, v4 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; SI-NEXT: v_or_b32_e32 v0, v4, v0 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -1489,19 +1527,22 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; SI-NEXT: s_load_dword s8, s[4:5], 0x18 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xc ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; SI-NEXT: v_mov_b32_e32 v1, 0x40200000 ; SI-NEXT: s_mov_b32 s7, 0x100f000 -; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_eq_u32 s8, 1 -; SI-NEXT: s_cselect_b32 s3, 0x40200000, s3 -; SI-NEXT: s_cselect_b32 s2, 0, s2 +; SI-NEXT: v_mov_b32_e32 v0, s3 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v0, s2 ; SI-NEXT: s_cmp_eq_u32 s8, 0 -; SI-NEXT: s_cselect_b32 s1, 0x40200000, s1 -; SI-NEXT: s_cselect_b32 s0, 0, s0 +; SI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc ; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_mov_b32_e32 v2, s2 -; SI-NEXT: v_mov_b32_e32 v3, s3 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -1510,19 +1551,22 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) ; VI-NEXT: s_load_dword s8, s[4:5], 0x60 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x30 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; VI-NEXT: v_mov_b32_e32 v1, 0x40200000 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 -; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_eq_u32 s8, 1 -; VI-NEXT: s_cselect_b32 s3, 0x40200000, s3 -; VI-NEXT: s_cselect_b32 s2, 0, s2 +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: s_cmp_eq_u32 s8, 0 -; VI-NEXT: s_cselect_b32 s1, 0x40200000, s1 -; VI-NEXT: s_cselect_b32 s0, 0, s0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v3, s3 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <2 x double> %a, double 8.0, i32 %b @@ -1533,43 +1577,47 @@ define amdgpu_kernel void @dynamic_insertelement_v2f64(<2 x double> addrspace(1) define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind { ; SI-LABEL: dynamic_insertelement_v2i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dword s8, s[4:5], 0x8 +; SI-NEXT: s_load_dword s10, s[4:5], 0x8 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x4 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; SI-NEXT: s_mov_b32 s7, 0x100f000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_cmp_eq_u32 s8, 1 -; SI-NEXT: s_cselect_b32 s3, 0, s3 -; SI-NEXT: s_cselect_b32 s2, 5, s2 -; SI-NEXT: s_cmp_eq_u32 s8, 0 -; SI-NEXT: s_cselect_b32 s1, 0, s1 -; SI-NEXT: s_cselect_b32 s0, 5, s0 +; SI-NEXT: s_cmp_eq_u32 s10, 1 +; SI-NEXT: v_mov_b32_e32 v0, s3 +; SI-NEXT: s_cselect_b64 s[8:9], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9] +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: s_cmp_eq_u32 s10, 0 +; SI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[8:9] +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[2:3] ; SI-NEXT: v_mov_b32_e32 v0, s0 -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_mov_b32_e32 v2, s2 -; SI-NEXT: v_mov_b32_e32 v3, s3 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[2:3] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: dynamic_insertelement_v2i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s8, s[4:5], 0x20 +; VI-NEXT: s_load_dword s10, s[4:5], 0x20 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_cmp_eq_u32 s8, 1 -; VI-NEXT: s_cselect_b32 s3, 0, s3 -; VI-NEXT: s_cselect_b32 s2, 5, s2 -; VI-NEXT: s_cmp_eq_u32 s8, 0 -; VI-NEXT: s_cselect_b32 s1, 0, s1 -; VI-NEXT: s_cselect_b32 s0, 5, s0 +; VI-NEXT: s_cmp_eq_u32 s10, 1 +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: s_cselect_b64 s[8:9], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[8:9] +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: s_cmp_eq_u32 s10, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[8:9] +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[2:3] ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v3, s3 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[2:3] ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <2 x i64> %a, i64 5, i32 %b @@ -1580,57 +1628,63 @@ define amdgpu_kernel void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* % define amdgpu_kernel void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %a, i32 %b) nounwind { ; SI-LABEL: dynamic_insertelement_v3i64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dword s6, s[4:5], 0x10 +; SI-NEXT: s_load_dword s12, s[4:5], 0x10 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xc ; SI-NEXT: s_mov_b32 s3, 0x100f000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_cmp_eq_u32 s6, 1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_cselect_b32 s7, 0, s11 -; SI-NEXT: s_cselect_b32 s10, 5, s10 -; SI-NEXT: s_cmp_eq_u32 s6, 0 -; SI-NEXT: s_cselect_b32 s9, 0, s9 -; SI-NEXT: s_cselect_b32 s8, 5, s8 -; SI-NEXT: s_cmp_eq_u32 s6, 2 -; SI-NEXT: s_cselect_b32 s5, 0, s5 -; SI-NEXT: s_cselect_b32 s4, 5, s4 -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 +; SI-NEXT: s_cmp_eq_u32 s12, 1 +; SI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; SI-NEXT: v_mov_b32_e32 v0, s11 +; SI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[6:7] +; SI-NEXT: v_mov_b32_e32 v0, s10 +; SI-NEXT: s_cmp_eq_u32 s12, 0 +; SI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[6:7] +; SI-NEXT: v_mov_b32_e32 v0, s9 +; SI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[6:7] ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: v_mov_b32_e32 v2, s10 -; SI-NEXT: v_mov_b32_e32 v3, s7 +; SI-NEXT: s_cmp_eq_u32 s12, 2 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[6:7] +; SI-NEXT: v_mov_b32_e32 v4, s5 +; SI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[6:7] +; SI-NEXT: v_mov_b32_e32 v4, s4 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 5, s[6:7] +; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:16 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: dynamic_insertelement_v3i64: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s6, s[4:5], 0x40 +; VI-NEXT: s_load_dword s12, s[4:5], 0x40 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x20 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x30 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_cmp_eq_u32 s6, 1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_cselect_b32 s7, 0, s11 -; VI-NEXT: s_cselect_b32 s10, 5, s10 -; VI-NEXT: s_cmp_eq_u32 s6, 0 -; VI-NEXT: s_cselect_b32 s9, 0, s9 -; VI-NEXT: s_cselect_b32 s8, 5, s8 -; VI-NEXT: s_cmp_eq_u32 s6, 2 -; VI-NEXT: s_cselect_b32 s5, 0, s5 -; VI-NEXT: s_cselect_b32 s4, 5, s4 -; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 +; VI-NEXT: s_cmp_eq_u32 s12, 1 +; VI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[6:7] +; VI-NEXT: v_mov_b32_e32 v0, s10 +; VI-NEXT: s_cmp_eq_u32 s12, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 5, s[6:7] +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[6:7] ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: v_mov_b32_e32 v2, s10 -; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_cmp_eq_u32 s12, 2 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 5, s[6:7] +; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v5, v4, 0, s[6:7] +; VI-NEXT: v_mov_b32_e32 v4, s4 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_cndmask_b32_e64 v4, v4, 5, s[6:7] +; VI-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <3 x i64> %a, i64 5, i32 %b @@ -1643,32 +1697,36 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1) ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s6, s[4:5], 0x10 ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8 +; SI-NEXT: v_mov_b32_e32 v4, 0x40200000 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; SI-NEXT: s_mov_b32 s3, 0x100f000 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_eq_u32 s6, 1 -; SI-NEXT: s_cselect_b32 s4, 0x40200000, s11 -; SI-NEXT: s_cselect_b32 s5, 0, s10 +; SI-NEXT: v_mov_b32_e32 v0, s11 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v0, s10 ; SI-NEXT: s_cmp_eq_u32 s6, 0 -; SI-NEXT: s_cselect_b32 s7, 0x40200000, s9 -; SI-NEXT: s_cselect_b32 s8, 0, s8 +; SI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v0, s9 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: s_cmp_eq_u32 s6, 3 -; SI-NEXT: s_cselect_b32 s9, 0x40200000, s15 -; SI-NEXT: s_cselect_b32 s10, 0, s14 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v5, s15 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc +; SI-NEXT: v_mov_b32_e32 v5, s14 ; SI-NEXT: s_cmp_eq_u32 s6, 2 -; SI-NEXT: s_cselect_b32 s6, 0x40200000, s13 -; SI-NEXT: s_cselect_b32 s11, 0, s12 -; SI-NEXT: v_mov_b32_e32 v0, s11 -; SI-NEXT: v_mov_b32_e32 v1, s6 -; SI-NEXT: v_mov_b32_e32 v2, s10 -; SI-NEXT: v_mov_b32_e32 v3, s9 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; SI-NEXT: s_nop 0 -; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: v_mov_b32_e32 v2, s5 -; SI-NEXT: v_mov_b32_e32 v3, s4 +; SI-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc +; SI-NEXT: v_mov_b32_e32 v5, s13 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc +; SI-NEXT: v_mov_b32_e32 v4, s12 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -1676,32 +1734,36 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(<4 x double> addrspace(1) ; VI: ; %bb.0: ; VI-NEXT: s_load_dword s6, s[4:5], 0x40 ; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 +; VI-NEXT: v_mov_b32_e32 v4, 0x40200000 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_eq_u32 s6, 1 -; VI-NEXT: s_cselect_b32 s4, 0x40200000, s11 -; VI-NEXT: s_cselect_b32 s5, 0, s10 +; VI-NEXT: v_mov_b32_e32 v0, s11 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v3, v0, v4, vcc +; VI-NEXT: v_mov_b32_e32 v0, s10 ; VI-NEXT: s_cmp_eq_u32 s6, 0 -; VI-NEXT: s_cselect_b32 s7, 0x40200000, s9 -; VI-NEXT: s_cselect_b32 s8, 0, s8 +; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, vcc +; VI-NEXT: v_mov_b32_e32 v0, s9 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v1, v0, v4, vcc +; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: s_cmp_eq_u32 s6, 3 -; VI-NEXT: s_cselect_b32 s9, 0x40200000, s15 -; VI-NEXT: s_cselect_b32 s10, 0, s14 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_mov_b32_e32 v5, s15 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v7, v5, v4, vcc +; VI-NEXT: v_mov_b32_e32 v5, s14 ; VI-NEXT: s_cmp_eq_u32 s6, 2 -; VI-NEXT: s_cselect_b32 s6, 0x40200000, s13 -; VI-NEXT: s_cselect_b32 s11, 0, s12 -; VI-NEXT: v_mov_b32_e32 v0, s11 -; VI-NEXT: v_mov_b32_e32 v1, s6 -; VI-NEXT: v_mov_b32_e32 v2, s10 -; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; VI-NEXT: s_nop 0 -; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: v_mov_b32_e32 v2, s5 -; VI-NEXT: v_mov_b32_e32 v3, s4 +; VI-NEXT: v_cndmask_b32_e64 v6, v5, 0, vcc +; VI-NEXT: v_mov_b32_e32 v5, s13 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc +; VI-NEXT: v_mov_b32_e32 v4, s12 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_endpgm %vecins = insertelement <4 x double> %a, double 8.0, i32 %b diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll index fd195f9b790eb..8d23a43125e67 100644 --- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=DOORBELL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=DOORBELL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s declare void @llvm.trap() #0 @@ -27,3 +27,6 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { store volatile i32 2, i32 addrspace(1)* %arg0 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll index 5614a9360c923..ad906d2e01120 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HSA-VI,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HSA-VI,FUNC %s ; Repeat of some problematic tests in kernel-args.ll, with the IR ; argument lowering pass disabled. Struct padding needs to be @@ -276,3 +276,6 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre store i32 %in, i32 addrspace(1)* undef, align 4 ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir new file mode 100644 index 0000000000000..0d74647e36068 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s + +# Compiler used to assert when voffset field is enabled in the MUBUF instruction for a VGPR spill inside a kernel body +# when the frame pointer is enabled. That limitation is now removed and this test should compile without any crash. + +--- | + define amdgpu_kernel void @kernel_vgpr32_spill() #0 { + ret void + } + + attributes #0 = { "frame-pointer"="all"} +... +--- +name: kernel_vgpr32_spill +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 8192, alignment: 8 } + - { id: 1, type: spill-slot, offset: 0, size: 4, alignment: 4 } + +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } +body: | + ; CHECK-LABEL: name: kernel_vgpr32_spill + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: $sgpr33 = S_MOV_B32 0 + ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc + ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0 + S_CMP_EQ_U32 0, 0, implicit-def $scc + SI_SPILL_V32_SAVE $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, align 4, addrspace 5) + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_NOP 0 + + bb.2: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll new file mode 100644 index 0000000000000..a9cf62f412505 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs %s -o - | FileCheck %s + +; The forced spill to preserve the scratch VGPR require the voffset to hold the large offset +; value in the MUBUF instruction being emitted before s_cbranch_scc1 as it clobbers the SCC. + +define amdgpu_kernel void @test_kernel(i32 %val) #0 { +; CHECK-LABEL: test_kernel: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s32, 0x180000 +; CHECK-NEXT: s_mov_b32 s33, 0 +; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s15 +; CHECK-NEXT: s_addc_u32 s1, s1, 0 +; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v3, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, v1 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: s_add_i32 s18, s33, 0x100200 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s18 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: s_load_dword s8, s[6:7], 0x0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_writelane_b32 v0, s8, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: s_add_i32 s18, s33, 0x100200 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s18 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def vgpr10 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_add_i32 s36, s33, 0x100100 +; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s36 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 s[16:17], 8 +; CHECK-NEXT: s_mov_b32 s8, s6 +; CHECK-NEXT: s_mov_b32 s6, s7 +; CHECK-NEXT: s_mov_b32 s9, s16 +; CHECK-NEXT: s_mov_b32 s7, s17 +; CHECK-NEXT: s_add_u32 s8, s8, s9 +; CHECK-NEXT: s_addc_u32 s6, s6, s7 +; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 +; CHECK-NEXT: s_mov_b32 s9, s6 +; CHECK-NEXT: v_mov_b32_e32 v0, 0x2000 +; CHECK-NEXT: ; implicit-def: $sgpr6 +; CHECK-NEXT: ; implicit-def: $sgpr6 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, device_func@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, device_func@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[16:17], s[6:7], 0x0 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b32 s6, 20 +; CHECK-NEXT: v_lshlrev_b32_e64 v3, s6, v3 +; CHECK-NEXT: s_mov_b32 s6, 10 +; CHECK-NEXT: v_lshlrev_b32_e64 v2, s6, v2 +; CHECK-NEXT: v_or3_b32 v31, v1, v2, v3 +; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 +; CHECK-NEXT: ; implicit-def: $sgpr15 +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: s_add_i32 s6, s33, 0x100200 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readlane_b32 s4, v0, 0 +; CHECK-NEXT: s_add_i32 s6, s33, 0x100100 +; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s6 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_cmp_eq_u32 s4, s5 +; CHECK-NEXT: v_mov_b32_e32 v1, 0x4000 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_store_dword v10, v1, s[0:3], s33 offen ; 4-byte Folded Spill +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %store +; CHECK-NEXT: s_add_i32 s5, s33, 0x100000 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; CHECK-NEXT: ; implicit-def: $sgpr4 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_write_b32 v0, v1 +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: s_add_i32 s4, s33, 0x100200 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_2: ; %end +; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 +; CHECK-NEXT: s_add_i32 s4, s33, 0x100200 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: s_endpgm + %arr = alloca < 1339 x i32>, align 8192, addrspace(5) + %cmp = icmp ne i32 %val, 0 + %vreg = call i32 asm sideeffect "; def vgpr10", "={v10}"() + call void @device_func(<1339 x i32> addrspace(5)* %arr) + br i1 %cmp, label %store, label %end + +store: + store volatile i32 %vreg, i32 addrspace(3)* undef + ret void + +end: + ret void +} + +declare void @device_func(<1339 x i32> addrspace(5)*) + +attributes #0 = { nounwind "frame-pointer"="all" } diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll index 66fb529326edf..2f16fa0e69734 100644 --- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck --check-prefixes=GCN,CI,ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,VI,ALL %s ; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck --check-prefixes=GCN,GFX9,ALL %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=4 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCNHSA,ALL %s ; FIXME: align on alloca seems to be ignored for private_segment_alignment @@ -67,3 +67,6 @@ define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { } attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll index f00f59b2ffc2b..77bc16d7312e7 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=HSA %s @lds.align16.0 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @lds.align16.1 = internal unnamed_addr addrspace(3) global [38 x i8] undef, align 16 @@ -255,3 +255,6 @@ define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 ad attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } attributes #2 = { convergent nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll index 66e2bfaeeb444..7e94b28c5acfc 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll @@ -59,19 +59,20 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) #1 { define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) { ; CHECK-LABEL: module_1_kernel_normal_extern_normal: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s8, s8, s11 +; CHECK-NEXT: s_add_u32 s6, s6, s9 ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 -; CHECK-NEXT: s_add_u32 s0, s0, s11 +; CHECK-NEXT: s_addc_u32 s7, s7, 0 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 +; CHECK-NEXT: s_add_u32 s0, s0, s9 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[8:9] -; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 -; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0 -; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_add_u32 s8, s4, 8 +; CHECK-NEXT: s_addc_u32 s9, s5, 0 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0 +; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 @@ -117,19 +118,20 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) #1 define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) { ; CHECK-LABEL: module_1_kernel_overalign_extern_normal: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s8, s8, s11 +; CHECK-NEXT: s_add_u32 s6, s6, s9 ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 -; CHECK-NEXT: s_add_u32 s0, s0, s11 +; CHECK-NEXT: s_addc_u32 s7, s7, 0 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 +; CHECK-NEXT: s_add_u32 s0, s0, s9 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[8:9] -; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 -; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0 -; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_add_u32 s8, s4, 8 +; CHECK-NEXT: s_addc_u32 s9, s5, 0 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0 +; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 @@ -175,19 +177,20 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) #1 define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) { ; CHECK-LABEL: module_1_kernel_normal_extern_overalign: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s8, s8, s11 +; CHECK-NEXT: s_add_u32 s6, s6, s9 ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 -; CHECK-NEXT: s_add_u32 s0, s0, s11 +; CHECK-NEXT: s_addc_u32 s7, s7, 0 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 +; CHECK-NEXT: s_add_u32 s0, s0, s9 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[8:9] -; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 -; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0 -; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_add_u32 s8, s4, 8 +; CHECK-NEXT: s_addc_u32 s9, s5, 0 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0 +; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 @@ -233,19 +236,20 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) { ; CHECK-LABEL: module_1_kernel_overalign_extern_overalign: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s8, s8, s11 +; CHECK-NEXT: s_add_u32 s6, s6, s9 ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_addc_u32 s9, s9, 0 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 -; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 -; CHECK-NEXT: s_add_u32 s0, s0, s11 +; CHECK-NEXT: s_addc_u32 s7, s7, 0 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 +; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 +; CHECK-NEXT: s_add_u32 s0, s0, s9 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[8:9] -; CHECK-NEXT: s_add_u32 s8, s8, use_module@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s9, s9, use_module@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 -; CHECK-NEXT: s_load_dword s12, s[6:7], 0x0 -; CHECK-NEXT: s_mov_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_add_u32 s8, s4, 8 +; CHECK-NEXT: s_addc_u32 s9, s5, 0 +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: s_add_u32 s6, s6, use_module@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s7, s7, use_module@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[6:7], 0x0 +; CHECK-NEXT: s_load_dword s12, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[10:11] ; CHECK-NEXT: s_lshl_b32 s4, s12, 2 diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index 540422ba45ab9..6ba0acf4337d2 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -14,10 +14,11 @@ define void @func_use_lds_global() { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: s_mov_b64 s[4:5], 0 ; GFX8-NEXT: ds_write_b32 v0, v0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7] -; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_trap 2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: func_use_lds_global: @@ -37,7 +38,9 @@ define void @func_use_lds_global_constexpr_cast() { ; GFX8-LABEL: func_use_lds_global_constexpr_cast: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b64 s[0:1], s[6:7] +; GFX8-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_trap 2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll index 4a94a95f081bc..c2768deac0fca 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are @@ -33,3 +33,6 @@ else: endif: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll index 2a8026ff516c2..30e457492408b 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll @@ -12,20 +12,20 @@ define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 % ; GCN-LABEL: name: load_zeroinit_lds_global ; GCN: bb.0 (%ir-block.0): ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 - ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; GCN: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 9, 0 + ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[PRED_COPY]](p4), 36, 0 + ; GFX8: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; GFX8: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[PRED_COPY2]], %subreg.sub0, killed [[PRED_COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec ; GCN: SI_INIT_M0 -1, implicit-def $m0 ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 killed [[V_MOV_B32_e32_]], 40, 0, implicit $m0, implicit $exec - ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + ; GFX9: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8: BUFFER_STORE_DWORD_OFFSET killed [[DS_READ_B32_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec - ; GFX9: FLAT_STORE_DWORD killed [[COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: FLAT_STORE_DWORD killed [[PRED_COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_ENDPGM 0 %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10 %ld = load i32, i32 addrspace(3)* %gep diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 6be238e035684..778077b2bacc0 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each ; pass. Ignore it with 'grep -v'. ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \ @@ -50,6 +51,7 @@ ; GCN-O0-NEXT: Expand vector predication intrinsics ; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O0-NEXT: Expand reduction intrinsics +; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O0-NEXT: AMDGPU Attributor ; GCN-O0-NEXT: CallGraph Construction ; GCN-O0-NEXT: Call Graph SCC Pass Manager @@ -119,7 +121,7 @@ ; GCN-O0-NEXT: Fast Register Allocator ; GCN-O0-NEXT: SI lower SGPR spill instructions ; GCN-O0-NEXT: Fast Register Allocator -; GCN-O0-NEXT: SI Fix VGPR copies +; GCN-O0-NEXT: SI Simplify Predicated Copies ; GCN-O0-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O0-NEXT: Fixup Statepoint Caller Saved ; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis @@ -223,6 +225,7 @@ ; GCN-O1-NEXT: Expand reduction intrinsics ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: TLS Variable Hoist +; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-NEXT: AMDGPU Attributor ; GCN-O1-NEXT: CallGraph Construction ; GCN-O1-NEXT: Call Graph SCC Pass Manager @@ -357,18 +360,17 @@ ; GCN-O1-NEXT: Live Register Matrix ; GCN-O1-NEXT: Greedy Register Allocator ; GCN-O1-NEXT: GCN NSA Reassign +; GCN-O1-NEXT: SI Simplify Predicated Copies ; GCN-O1-NEXT: Virtual Register Rewriter ; GCN-O1-NEXT: Stack Slot Coloring ; GCN-O1-NEXT: Machine Copy Propagation Pass ; GCN-O1-NEXT: Machine Loop Invariant Code Motion -; GCN-O1-NEXT: SI Fix VGPR copies ; GCN-O1-NEXT: SI optimize exec mask operations ; GCN-O1-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O1-NEXT: Fixup Statepoint Caller Saved ; GCN-O1-NEXT: PostRA Machine Sink -; GCN-O1-NEXT: MachineDominator Tree Construction -; GCN-O1-NEXT: Machine Natural Loop Construction ; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: MachinePostDominator Tree Construction ; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Optimization Remark Emitter @@ -505,6 +507,7 @@ ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: TLS Variable Hoist ; GCN-O1-OPTS-NEXT: Early CSE +; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-OPTS-NEXT: AMDGPU Attributor ; GCN-O1-OPTS-NEXT: CallGraph Construction ; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager @@ -653,18 +656,17 @@ ; GCN-O1-OPTS-NEXT: Live Register Matrix ; GCN-O1-OPTS-NEXT: Greedy Register Allocator ; GCN-O1-OPTS-NEXT: GCN NSA Reassign +; GCN-O1-OPTS-NEXT: SI Simplify Predicated Copies ; GCN-O1-OPTS-NEXT: Virtual Register Rewriter ; GCN-O1-OPTS-NEXT: Stack Slot Coloring ; GCN-O1-OPTS-NEXT: Machine Copy Propagation Pass ; GCN-O1-OPTS-NEXT: Machine Loop Invariant Code Motion -; GCN-O1-OPTS-NEXT: SI Fix VGPR copies ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations ; GCN-O1-OPTS-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O1-OPTS-NEXT: Fixup Statepoint Caller Saved ; GCN-O1-OPTS-NEXT: PostRA Machine Sink -; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction -; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction ; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter @@ -801,6 +803,7 @@ ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: TLS Variable Hoist ; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O2-NEXT: AMDGPU Attributor ; GCN-O2-NEXT: CallGraph Construction ; GCN-O2-NEXT: Call Graph SCC Pass Manager @@ -951,18 +954,17 @@ ; GCN-O2-NEXT: Live Register Matrix ; GCN-O2-NEXT: Greedy Register Allocator ; GCN-O2-NEXT: GCN NSA Reassign +; GCN-O2-NEXT: SI Simplify Predicated Copies ; GCN-O2-NEXT: Virtual Register Rewriter ; GCN-O2-NEXT: Stack Slot Coloring ; GCN-O2-NEXT: Machine Copy Propagation Pass ; GCN-O2-NEXT: Machine Loop Invariant Code Motion -; GCN-O2-NEXT: SI Fix VGPR copies ; GCN-O2-NEXT: SI optimize exec mask operations ; GCN-O2-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O2-NEXT: Fixup Statepoint Caller Saved ; GCN-O2-NEXT: PostRA Machine Sink -; GCN-O2-NEXT: MachineDominator Tree Construction -; GCN-O2-NEXT: Machine Natural Loop Construction ; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: MachineDominator Tree Construction ; GCN-O2-NEXT: MachinePostDominator Tree Construction ; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Optimization Remark Emitter @@ -1112,6 +1114,7 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O3-NEXT: AMDGPU Attributor ; GCN-O3-NEXT: CallGraph Construction ; GCN-O3-NEXT: Call Graph SCC Pass Manager @@ -1262,18 +1265,17 @@ ; GCN-O3-NEXT: Live Register Matrix ; GCN-O3-NEXT: Greedy Register Allocator ; GCN-O3-NEXT: GCN NSA Reassign +; GCN-O3-NEXT: SI Simplify Predicated Copies ; GCN-O3-NEXT: Virtual Register Rewriter ; GCN-O3-NEXT: Stack Slot Coloring ; GCN-O3-NEXT: Machine Copy Propagation Pass ; GCN-O3-NEXT: Machine Loop Invariant Code Motion -; GCN-O3-NEXT: SI Fix VGPR copies ; GCN-O3-NEXT: SI optimize exec mask operations ; GCN-O3-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O3-NEXT: Fixup Statepoint Caller Saved ; GCN-O3-NEXT: PostRA Machine Sink -; GCN-O3-NEXT: MachineDominator Tree Construction -; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: MachineDominator Tree Construction ; GCN-O3-NEXT: MachinePostDominator Tree Construction ; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll index 5279f8e997198..534eee1143978 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.dispatch.id() #1 @@ -17,3 +17,6 @@ define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 { attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll index 42826b7466f97..29f0b1d71198e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target @@ -33,3 +33,6 @@ define amdgpu_kernel void @test2(i32 addrspace(1)* %out) { declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll index 6248bef24b3a8..7962bf67b8ef2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll @@ -16,108 +16,105 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(<32 x float> addrspace(3)* no ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; GCN-NEXT: v_mov_b32_e32 v2, 1.0 ; GCN-NEXT: v_mov_b32_e32 v3, 2.0 ; GCN-NEXT: ; iglp_opt mask(0x00000000) ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_add_u32_e32 v1, s0, v0 -; GCN-NEXT: ds_read_b128 a[28:31], v1 offset:112 -; GCN-NEXT: ds_read_b128 a[24:27], v1 offset:96 -; GCN-NEXT: ds_read_b128 a[20:23], v1 offset:80 -; GCN-NEXT: ds_read_b128 a[16:19], v1 offset:64 -; GCN-NEXT: ds_read_b128 a[0:3], v1 -; GCN-NEXT: ds_read_b128 a[4:7], v1 offset:16 -; GCN-NEXT: ds_read_b128 a[8:11], v1 offset:32 -; GCN-NEXT: ds_read_b128 a[12:15], v1 offset:48 -; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, 0x6000, v1 +; GCN-NEXT: ds_read_b128 a[28:31], v2 offset:57456 +; GCN-NEXT: ds_read_b128 a[24:27], v2 offset:57440 +; GCN-NEXT: ds_read_b128 a[20:23], v2 offset:57424 +; GCN-NEXT: ds_read_b128 a[16:19], v2 offset:57408 +; GCN-NEXT: ds_read_b128 a[0:3], v2 offset:57344 +; GCN-NEXT: ds_read_b128 a[4:7], v2 offset:57360 +; GCN-NEXT: ds_read_b128 a[8:11], v2 offset:57376 +; GCN-NEXT: ds_read_b128 a[12:15], v2 offset:57392 +; GCN-NEXT: v_mov_b32_e32 v2, 1.0 +; GCN-NEXT: ds_read_b128 a[60:63], v1 offset:49264 +; GCN-NEXT: ds_read_b128 a[56:59], v1 offset:49248 +; GCN-NEXT: ds_read_b128 a[52:55], v1 offset:49232 +; GCN-NEXT: ds_read_b128 a[48:51], v1 offset:49216 +; GCN-NEXT: ds_read_b128 a[44:47], v1 offset:49200 +; GCN-NEXT: ds_read_b128 a[40:43], v1 offset:49184 +; GCN-NEXT: ds_read_b128 a[36:39], v1 offset:49168 +; GCN-NEXT: ds_read_b128 a[32:35], v1 offset:49152 +; GCN-NEXT: s_waitcnt lgkmcnt(8) ; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v3, a[0:31] +; GCN-NEXT: ds_read_b128 a[156:159], v1 offset:112 +; GCN-NEXT: ds_read_b128 a[152:155], v1 offset:96 +; GCN-NEXT: ds_read_b128 a[68:71], v1 offset:24592 +; GCN-NEXT: ds_read_b128 a[64:67], v1 offset:24576 ; GCN-NEXT: v_add_u32_e32 v0, s1, v0 -; GCN-NEXT: ds_read_b128 a[44:47], v1 offset:8240 -; GCN-NEXT: ds_read_b128 a[40:43], v1 offset:8224 -; GCN-NEXT: ds_read_b128 a[60:63], v1 offset:8304 -; GCN-NEXT: ds_read_b128 a[36:39], v1 offset:8208 -; GCN-NEXT: ds_read_b128 a[32:35], v1 offset:8192 -; GCN-NEXT: ds_read_b128 a[56:59], v1 offset:8288 -; GCN-NEXT: v_add_u32_e32 v4, 0x6000, v1 -; GCN-NEXT: ds_read_b128 a[116:119], v1 offset:24688 -; GCN-NEXT: ds_read_b128 a[112:115], v1 offset:24672 -; GCN-NEXT: ds_read_b128 a[108:111], v1 offset:24656 -; GCN-NEXT: ds_read_b128 a[104:107], v1 offset:24640 -; GCN-NEXT: ds_read_b128 a[100:103], v1 offset:24624 -; GCN-NEXT: ds_read_b128 a[96:99], v1 offset:24608 -; GCN-NEXT: ds_read_b128 a[92:95], v1 offset:24592 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:112 -; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:96 -; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:80 -; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:64 -; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:48 -; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32 -; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:16 -; GCN-NEXT: ds_read_b128 a[52:55], v1 offset:8272 -; GCN-NEXT: ds_write_b128 v0, a[0:3] -; GCN-NEXT: ds_read_b128 a[48:51], v1 offset:8256 -; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_waitcnt lgkmcnt(4) ; GCN-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v2, v3, a[32:63] -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: ds_read_b128 a[88:91], v1 offset:24576 -; GCN-NEXT: ds_read_b128 a[84:87], v1 offset:49264 -; GCN-NEXT: ds_read_b128 a[80:83], v1 offset:49248 -; GCN-NEXT: ds_read_b128 a[76:79], v1 offset:49232 -; GCN-NEXT: ds_read_b128 a[72:75], v1 offset:49216 -; GCN-NEXT: ds_read_b128 a[68:71], v1 offset:49200 -; GCN-NEXT: ds_read_b128 a[64:67], v1 offset:49184 -; GCN-NEXT: ds_read_b128 a[28:31], v4 offset:57456 -; GCN-NEXT: ds_read_b128 a[24:27], v4 offset:57440 -; GCN-NEXT: ds_read_b128 a[20:23], v4 offset:57424 -; GCN-NEXT: ds_read_b128 a[16:19], v4 offset:57408 -; GCN-NEXT: ds_read_b128 a[0:3], v4 offset:57344 -; GCN-NEXT: ds_read_b128 a[4:7], v4 offset:57360 -; GCN-NEXT: ds_read_b128 a[8:11], v4 offset:57376 -; GCN-NEXT: s_nop 3 -; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:8288 -; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:8304 -; GCN-NEXT: ds_read_b128 a[60:63], v1 offset:49168 -; GCN-NEXT: ds_read_b128 a[56:59], v1 offset:49152 -; GCN-NEXT: ds_read_b128 a[12:15], v4 offset:57392 +; GCN-NEXT: ds_read_b128 a[148:151], v1 offset:80 +; GCN-NEXT: ds_read_b128 a[144:147], v1 offset:64 +; GCN-NEXT: ds_read_b128 a[128:131], v1 +; GCN-NEXT: ds_read_b128 a[132:135], v1 offset:16 +; GCN-NEXT: ds_read_b128 a[136:139], v1 offset:32 +; GCN-NEXT: ds_read_b128 a[140:143], v1 offset:48 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v2, v3, a[0:31] -; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:8256 -; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:8272 -; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:8224 -; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:8240 -; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:8192 -; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:8208 -; GCN-NEXT: v_mfma_f32_32x32x1f32 a[56:87], v2, v3, a[56:87] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: v_mfma_f32_32x32x1f32 a[128:159], v2, v3, a[128:159] +; GCN-NEXT: ds_read_b128 a[124:127], v1 offset:8304 +; GCN-NEXT: ds_read_b128 a[120:123], v1 offset:8288 +; GCN-NEXT: ds_read_b128 a[116:119], v1 offset:8272 +; GCN-NEXT: ds_read_b128 a[112:115], v1 offset:8256 +; GCN-NEXT: ds_read_b128 a[108:111], v1 offset:8240 +; GCN-NEXT: ds_read_b128 a[104:107], v1 offset:8224 +; GCN-NEXT: ds_read_b128 a[100:103], v1 offset:8208 +; GCN-NEXT: ds_read_b128 a[96:99], v1 offset:8192 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mfma_f32_32x32x1f32 a[96:127], v2, v3, a[96:127] +; GCN-NEXT: ds_read_b128 a[92:95], v1 offset:24688 +; GCN-NEXT: ds_read_b128 a[88:91], v1 offset:24672 +; GCN-NEXT: ds_read_b128 a[84:87], v1 offset:24656 +; GCN-NEXT: ds_read_b128 a[80:83], v1 offset:24640 +; GCN-NEXT: ds_read_b128 a[76:79], v1 offset:24624 +; GCN-NEXT: ds_read_b128 a[72:75], v1 offset:24608 +; GCN-NEXT: s_nop 2 +; GCN-NEXT: ds_write_b128 v0, a[156:159] offset:112 +; GCN-NEXT: ds_write_b128 v0, a[152:155] offset:96 +; GCN-NEXT: ds_write_b128 v0, a[148:151] offset:80 +; GCN-NEXT: ds_write_b128 v0, a[144:147] offset:64 +; GCN-NEXT: ds_write_b128 v0, a[140:143] offset:48 +; GCN-NEXT: ds_write_b128 v0, a[136:139] offset:32 +; GCN-NEXT: ds_write_b128 v0, a[132:135] offset:16 +; GCN-NEXT: ds_write_b128 v0, a[128:131] +; GCN-NEXT: v_mov_b32_e32 v0, s1 +; GCN-NEXT: s_waitcnt lgkmcnt(8) +; GCN-NEXT: v_mfma_f32_32x32x1f32 a[64:95], v2, v3, a[64:95] +; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:24672 +; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:24688 +; GCN-NEXT: ds_write_b128 v0, a[48:51] offset:24640 +; GCN-NEXT: ds_write_b128 v0, a[120:123] offset:8288 +; GCN-NEXT: ds_write_b128 v0, a[124:127] offset:8304 +; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:8256 +; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:8272 +; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:8224 +; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:8240 +; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:8192 +; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:8208 +; GCN-NEXT: ds_write_b128 v0, a[52:55] offset:24656 +; GCN-NEXT: ds_write_b128 v0, a[40:43] offset:24608 +; GCN-NEXT: ds_write_b128 v0, a[44:47] offset:24624 +; GCN-NEXT: ds_write_b128 v0, a[32:35] offset:24576 +; GCN-NEXT: ds_write_b128 v0, a[36:39] offset:24592 ; GCN-NEXT: ds_write_b128 v0, a[24:27] offset:32864 ; GCN-NEXT: ds_write_b128 v0, a[28:31] offset:32880 ; GCN-NEXT: ds_write_b128 v0, a[16:19] offset:32832 +; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:16480 +; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:16496 +; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:16448 +; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:16464 +; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:16416 +; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:16432 +; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:16384 +; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:16400 ; GCN-NEXT: ds_write_b128 v0, a[20:23] offset:32848 ; GCN-NEXT: ds_write_b128 v0, a[8:11] offset:32800 ; GCN-NEXT: ds_write_b128 v0, a[12:15] offset:32816 ; GCN-NEXT: ds_write_b128 v0, a[0:3] offset:32768 ; GCN-NEXT: ds_write_b128 v0, a[4:7] offset:32784 -; GCN-NEXT: v_mfma_f32_32x32x1f32 a[88:119], v2, v3, a[88:119] -; GCN-NEXT: ds_write_b128 v0, a[80:83] offset:24672 -; GCN-NEXT: ds_write_b128 v0, a[84:87] offset:24688 -; GCN-NEXT: ds_write_b128 v0, a[72:75] offset:24640 -; GCN-NEXT: ds_write_b128 v0, a[76:79] offset:24656 -; GCN-NEXT: ds_write_b128 v0, a[64:67] offset:24608 -; GCN-NEXT: ds_write_b128 v0, a[68:71] offset:24624 -; GCN-NEXT: ds_write_b128 v0, a[56:59] offset:24576 -; GCN-NEXT: ds_write_b128 v0, a[60:63] offset:24592 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 -; GCN-NEXT: ds_write_b128 v0, a[112:115] offset:16480 -; GCN-NEXT: ds_write_b128 v0, a[116:119] offset:16496 -; GCN-NEXT: ds_write_b128 v0, a[104:107] offset:16448 -; GCN-NEXT: ds_write_b128 v0, a[108:111] offset:16464 -; GCN-NEXT: ds_write_b128 v0, a[96:99] offset:16416 -; GCN-NEXT: ds_write_b128 v0, a[100:103] offset:16432 -; GCN-NEXT: ds_write_b128 v0, a[88:91] offset:16384 -; GCN-NEXT: ds_write_b128 v0, a[92:95] offset:16400 ; GCN-NEXT: s_endpgm entry: call void @llvm.amdgcn.iglp.opt(i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 92261d700446b..000ef46fb8712 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,COV5 %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,COV5 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: ; HSA: enable_sgpr_kernarg_segment_ptr = 1 @@ -403,3 +403,6 @@ attributes #0 = { nounwind noinline } attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll index f5c137a056b73..4a907b34ddd29 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}is_private_vgpr: ; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] @@ -47,3 +47,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i1 @llvm.amdgcn.is.private(i8* nocapture) #0 attributes #0 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll index f98676b96439d..b5f4d245c625d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}is_local_vgpr: ; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] @@ -48,3 +48,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #0 attributes #0 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 3ae0f77881d89..f1aa6868a5e5f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s @@ -123,3 +123,6 @@ attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll index c7d1bbb3b4795..4dad5719fab11 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target @@ -17,3 +17,6 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) { declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll index c04ab319cc8c5..a8f6bf5b20c98 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll @@ -12,7 +12,6 @@ define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -33,7 +32,6 @@ define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -58,7 +56,6 @@ define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: v_add_f32_e32 v1, v1, v2 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 idxen ; CHECK-NEXT: v_add_f32_e32 v0, v1, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ; return to shader part epilog main_body: @@ -124,7 +121,6 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -156,7 +152,6 @@ define amdgpu_ps float @test_strict_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -191,7 +186,6 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec ; CHECK-NEXT: .LBB6_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 @@ -246,7 +240,6 @@ define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec ; CHECK-NEXT: .LBB7_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: s_and_b64 exec, exec, s[14:15] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll index 6d7ce5589f96f..4eb54de147228 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2 %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2 %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -104,3 +104,6 @@ define amdgpu_kernel void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll index f41a184f3179c..b9c9630210a72 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -132,6 +132,9 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(i32* %out) !reqd_work attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +!llvm.module.flags = !{!3} + !0 = !{i32 64, i32 1, i32 1} !1 = !{i32 1, i32 64, i32 1} !2 = !{i32 1, i32 1, i32 64} +!3 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll index a3ce49234d788..7ac2b14cd5862 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll @@ -684,7 +684,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: s_and_b64 exec, exec, s[0:1] ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_or_b64 s[2:3], s[2:3], vcc @@ -732,7 +731,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], vcc @@ -777,7 +775,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX10-32-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_xor_b32 s1, s0, -1 @@ -823,7 +820,6 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) { ; GFX10-64-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX10-64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_xor_b64 s[2:3], s[0:1], -1 @@ -921,7 +917,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; SI-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] @@ -987,7 +982,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX9-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] @@ -1050,7 +1044,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX10-32-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_or_b32 s3, s3, vcc_lo ; GFX10-32-NEXT: s_and_saveexec_b32 s4, s3 @@ -1113,7 +1106,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index ; GFX10-64-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX10-64-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index 0a62c42969bb9..ade927bdd19be 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -284,9 +284,10 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; SI-NEXT: v_mul_hi_u32 v0, s0, v0 ; SI-NEXT: v_mul_hi_u32 v2, s1, v2 ; SI-NEXT: s_mul_i32 s1, s1, s3 -; SI-NEXT: s_mul_i32 s2, s0, s2 +; SI-NEXT: s_mul_i32 s0, s0, s2 ; SI-NEXT: v_add_i32_e32 v4, vcc, s5, v0 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; SI-NEXT: v_mov_b32_e32 v5, s0 ; SI-NEXT: v_add_i32_e32 v4, vcc, s4, v4 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc @@ -296,10 +297,8 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; SI-NEXT: v_add_i32_e32 v2, vcc, s4, v3 ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; SI-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc -; SI-NEXT: s_and_b64 s[0:1], vcc, exec -; SI-NEXT: s_cselect_b32 s0, 0, s2 +; SI-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -323,12 +322,13 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_addc_u32 s5, 0, s5 ; GFX9-NEXT: s_add_i32 s1, s8, s7 ; GFX9-NEXT: s_add_i32 s1, s1, s6 -; GFX9-NEXT: s_mul_i32 s0, s0, s2 ; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX9-NEXT: s_cselect_b32 s1, 0, s1 -; GFX9-NEXT: s_cselect_b32 s0, 0, s0 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: s_mul_i32 s2, s0, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm ; @@ -354,10 +354,9 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; GFX10-NEXT: s_mul_i32 s0, s0, s2 ; GFX10-NEXT: s_add_i32 s1, s1, s6 ; GFX10-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX10-NEXT: s_cselect_b32 s0, 0, s0 -; GFX10-NEXT: s_cselect_b32 s1, 0, s1 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, s2 ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; @@ -383,10 +382,10 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; GFX11-NEXT: s_mul_i32 s0, s0, s2 ; GFX11-NEXT: s_add_i32 s1, s1, s6 ; GFX11-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX11-NEXT: s_cselect_b32 s0, 0, s0 -; GFX11-NEXT: s_cselect_b32 s1, 0, s1 +; GFX11-NEXT: s_cselect_b32 s2, -1, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, 0, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v0, s0, 0, s2 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -414,38 +413,35 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; SI-NEXT: v_mul_hi_u32 v0, s0, v0 ; SI-NEXT: v_mul_hi_i32 v2, s1, v2 ; SI-NEXT: s_mul_i32 s6, s1, s3 -; SI-NEXT: s_mul_i32 s8, s0, s2 -; SI-NEXT: v_readfirstlane_b32 s9, v1 -; SI-NEXT: v_readfirstlane_b32 s10, v3 -; SI-NEXT: v_readfirstlane_b32 s11, v0 -; SI-NEXT: v_readfirstlane_b32 s12, v2 -; SI-NEXT: v_add_i32_e32 v0, vcc, s5, v0 -; SI-NEXT: s_add_u32 s5, s11, s5 -; SI-NEXT: v_add_i32_e32 v2, vcc, s4, v0 -; SI-NEXT: s_addc_u32 s10, 0, s10 -; SI-NEXT: v_ashrrev_i32_e32 v0, 31, v2 -; SI-NEXT: s_add_u32 s4, s5, s4 -; SI-NEXT: v_mov_b32_e32 v1, v0 -; SI-NEXT: s_addc_u32 s4, s10, s9 -; SI-NEXT: s_addc_u32 s5, s12, 0 -; SI-NEXT: s_add_u32 s4, s4, s6 -; SI-NEXT: s_addc_u32 s5, 0, s5 -; SI-NEXT: s_sub_u32 s2, s4, s2 -; SI-NEXT: s_subb_u32 s6, s5, 0 ; SI-NEXT: s_cmp_lt_i32 s1, 0 -; SI-NEXT: s_cselect_b32 s1, s6, s5 -; SI-NEXT: s_cselect_b32 s2, s2, s4 -; SI-NEXT: s_sub_u32 s0, s2, s0 -; SI-NEXT: s_subb_u32 s4, s1, 0 +; SI-NEXT: s_mul_i32 s1, s0, s2 +; SI-NEXT: v_add_i32_e32 v4, vcc, s5, v0 +; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; SI-NEXT: v_mov_b32_e32 v5, s1 +; SI-NEXT: v_add_i32_e32 v4, vcc, s4, v4 +; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc +; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; SI-NEXT: v_add_i32_e32 v0, vcc, s5, v0 +; SI-NEXT: v_add_i32_e32 v1, vcc, s6, v1 +; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; SI-NEXT: v_add_i32_e32 v4, vcc, s4, v0 +; SI-NEXT: v_subrev_i32_e32 v3, vcc, s2, v1 +; SI-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v2, vcc +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_lt_i32 s3, 0 -; SI-NEXT: s_cselect_b32 s1, s4, s1 -; SI-NEXT: s_cselect_b32 s0, s0, s2 -; SI-NEXT: v_cmp_ne_u64_e32 vcc, s[0:1], v[0:1] -; SI-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc -; SI-NEXT: s_and_b64 s[0:1], vcc, exec -; SI-NEXT: s_cselect_b32 s0, 0, s8 +; SI-NEXT: v_ashrrev_i32_e32 v0, 31, v4 +; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; SI-NEXT: v_cndmask_b32_e32 v6, v1, v3, vcc +; SI-NEXT: v_mov_b32_e32 v1, v0 +; SI-NEXT: v_subrev_i32_e32 v7, vcc, s0, v6 +; SI-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v2, vcc +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc +; SI-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc +; SI-NEXT: v_cmp_ne_u64_e32 vcc, v[2:3], v[0:1] +; SI-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -455,38 +451,44 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_mul_i32 s7, s0, s3 ; GFX9-NEXT: s_mul_hi_u32 s8, s0, s2 -; GFX9-NEXT: s_mul_hi_u32 s5, s0, s3 +; GFX9-NEXT: s_mul_hi_u32 s6, s0, s3 ; GFX9-NEXT: s_add_u32 s9, s8, s7 -; GFX9-NEXT: s_mul_i32 s6, s1, s2 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_mul_i32 s5, s1, s2 +; GFX9-NEXT: s_addc_u32 s6, 0, s6 ; GFX9-NEXT: s_mul_hi_u32 s4, s1, s2 -; GFX9-NEXT: s_add_u32 s9, s9, s6 +; GFX9-NEXT: s_add_u32 s9, s9, s5 ; GFX9-NEXT: s_mul_hi_i32 s10, s1, s3 -; GFX9-NEXT: s_addc_u32 s4, s5, s4 -; GFX9-NEXT: s_addc_u32 s5, s10, 0 +; GFX9-NEXT: s_addc_u32 s4, s6, s4 +; GFX9-NEXT: s_addc_u32 s6, s10, 0 ; GFX9-NEXT: s_mul_i32 s9, s1, s3 ; GFX9-NEXT: s_add_u32 s4, s4, s9 -; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_addc_u32 s6, 0, s6 ; GFX9-NEXT: s_sub_u32 s9, s4, s2 -; GFX9-NEXT: s_subb_u32 s10, s5, 0 +; GFX9-NEXT: s_subb_u32 s10, s6, 0 ; GFX9-NEXT: s_cmp_lt_i32 s1, 0 -; GFX9-NEXT: s_cselect_b32 s4, s9, s4 -; GFX9-NEXT: s_cselect_b32 s1, s10, s5 -; GFX9-NEXT: s_sub_u32 s9, s4, s0 -; GFX9-NEXT: s_subb_u32 s5, s1, 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc +; GFX9-NEXT: v_subrev_co_u32_e32 v3, vcc, s0, v2 +; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v0, vcc ; GFX9-NEXT: s_cmp_lt_i32 s3, 0 -; GFX9-NEXT: s_cselect_b32 s5, s5, s1 -; GFX9-NEXT: s_cselect_b32 s4, s9, s4 +; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX9-NEXT: s_add_i32 s1, s8, s7 -; GFX9-NEXT: s_add_i32 s1, s1, s6 -; GFX9-NEXT: s_ashr_i32 s6, s1, 31 -; GFX9-NEXT: s_mov_b32 s7, s6 +; GFX9-NEXT: s_add_i32 s1, s1, s5 +; GFX9-NEXT: s_ashr_i32 s4, s1, 31 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s5, s4 ; GFX9-NEXT: s_mul_i32 s0, s0, s2 -; GFX9-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] -; GFX9-NEXT: s_cselect_b32 s1, 0, s1 -; GFX9-NEXT: s_cselect_b32 s0, 0, s0 +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, s[4:5], v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v2, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_endpgm ; @@ -496,38 +498,40 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_mul_i32 s7, s0, s3 ; GFX10-NEXT: s_mul_hi_u32 s8, s0, s2 -; GFX10-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX10-NEXT: s_mul_i32 s6, s1, s2 +; GFX10-NEXT: s_mul_hi_u32 s6, s0, s3 +; GFX10-NEXT: s_mul_i32 s5, s1, s2 ; GFX10-NEXT: s_add_u32 s11, s8, s7 ; GFX10-NEXT: s_mul_hi_u32 s4, s1, s2 -; GFX10-NEXT: s_addc_u32 s5, 0, s5 +; GFX10-NEXT: s_addc_u32 s6, 0, s6 ; GFX10-NEXT: s_mul_hi_i32 s9, s1, s3 -; GFX10-NEXT: s_add_u32 s11, s11, s6 +; GFX10-NEXT: s_add_u32 s11, s11, s5 ; GFX10-NEXT: s_mul_i32 s10, s1, s3 -; GFX10-NEXT: s_addc_u32 s4, s5, s4 -; GFX10-NEXT: s_addc_u32 s5, s9, 0 +; GFX10-NEXT: s_addc_u32 s4, s6, s4 +; GFX10-NEXT: s_addc_u32 s6, s9, 0 ; GFX10-NEXT: s_add_u32 s4, s4, s10 -; GFX10-NEXT: s_addc_u32 s5, 0, s5 +; GFX10-NEXT: s_addc_u32 s6, 0, s6 ; GFX10-NEXT: s_sub_u32 s9, s4, s2 -; GFX10-NEXT: s_subb_u32 s10, s5, 0 +; GFX10-NEXT: s_subb_u32 s10, s6, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: s_cmp_lt_i32 s1, 0 -; GFX10-NEXT: s_cselect_b32 s1, s9, s4 -; GFX10-NEXT: s_cselect_b32 s4, s10, s5 -; GFX10-NEXT: s_sub_u32 s9, s1, s0 -; GFX10-NEXT: s_subb_u32 s5, s4, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s10 +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX10-NEXT: s_cmp_lt_i32 s3, 0 -; GFX10-NEXT: s_mul_i32 s0, s0, s2 -; GFX10-NEXT: s_cselect_b32 s5, s5, s4 -; GFX10-NEXT: s_cselect_b32 s4, s9, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, s6, v0, vcc_lo +; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v2, s0 +; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v0, vcc_lo +; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX10-NEXT: s_add_i32 s1, s8, s7 -; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_mov_b32 s7, s6 -; GFX10-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] -; GFX10-NEXT: s_cselect_b32 s0, 0, s0 -; GFX10-NEXT: s_cselect_b32 s1, 0, s1 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: s_mul_i32 s0, s0, s2 +; GFX10-NEXT: s_add_i32 s1, s1, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo +; GFX10-NEXT: s_ashr_i32 s4, s1, 31 +; GFX10-NEXT: s_mov_b32 s5, s4 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX10-NEXT: v_cndmask_b32_e64 v1, s1, 0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, s0, 0, vcc_lo ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; @@ -537,40 +541,42 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_mul_i32 s7, s0, s3 ; GFX11-NEXT: s_mul_hi_u32 s8, s0, s2 -; GFX11-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX11-NEXT: s_mul_i32 s6, s1, s2 +; GFX11-NEXT: s_mul_hi_u32 s6, s0, s3 +; GFX11-NEXT: s_mul_i32 s5, s1, s2 ; GFX11-NEXT: s_add_u32 s11, s8, s7 ; GFX11-NEXT: s_mul_hi_u32 s4, s1, s2 -; GFX11-NEXT: s_addc_u32 s5, 0, s5 +; GFX11-NEXT: s_addc_u32 s6, 0, s6 ; GFX11-NEXT: s_mul_hi_i32 s9, s1, s3 -; GFX11-NEXT: s_add_u32 s11, s11, s6 +; GFX11-NEXT: s_add_u32 s11, s11, s5 ; GFX11-NEXT: s_mul_i32 s10, s1, s3 -; GFX11-NEXT: s_addc_u32 s4, s5, s4 -; GFX11-NEXT: s_addc_u32 s5, s9, 0 +; GFX11-NEXT: s_addc_u32 s4, s6, s4 +; GFX11-NEXT: s_addc_u32 s6, s9, 0 ; GFX11-NEXT: s_add_u32 s4, s4, s10 -; GFX11-NEXT: s_addc_u32 s5, 0, s5 +; GFX11-NEXT: s_addc_u32 s6, 0, s6 ; GFX11-NEXT: s_sub_u32 s9, s4, s2 -; GFX11-NEXT: s_subb_u32 s10, s5, 0 +; GFX11-NEXT: s_subb_u32 s10, s6, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_dual_mov_b32 v1, s9 :: v_dual_mov_b32 v0, s10 ; GFX11-NEXT: s_cmp_lt_i32 s1, 0 -; GFX11-NEXT: s_cselect_b32 s1, s9, s4 -; GFX11-NEXT: s_cselect_b32 s4, s10, s5 -; GFX11-NEXT: s_sub_u32 s9, s1, s0 -; GFX11-NEXT: s_subb_u32 s5, s4, 0 +; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX11-NEXT: s_cmp_lt_i32 s3, 0 -; GFX11-NEXT: s_mul_i32 s0, s0, s2 -; GFX11-NEXT: s_cselect_b32 s5, s5, s4 -; GFX11-NEXT: s_cselect_b32 s4, s9, s1 +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, s6, v0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_co_u32 v3, vcc_lo, v2, s0 +; GFX11-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v0, vcc_lo +; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX11-NEXT: s_add_i32 s1, s8, s7 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s1, s1, s6 -; GFX11-NEXT: s_ashr_i32 s6, s1, 31 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_mov_b32 s7, s6 -; GFX11-NEXT: s_cmp_lg_u64 s[4:5], s[6:7] -; GFX11-NEXT: s_cselect_b32 s0, 0, s0 -; GFX11-NEXT: s_cselect_b32 s1, 0, s1 +; GFX11-NEXT: s_mul_i32 s0, s0, s2 +; GFX11-NEXT: s_add_i32 s1, s1, s5 +; GFX11-NEXT: v_dual_cndmask_b32 v1, v0, v1 :: v_dual_cndmask_b32 v0, v2, v3 +; GFX11-NEXT: s_ashr_i32 s4, s1, 31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_mov_b32 s5, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, s0, 0, vcc_lo ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index 8b21896d64603..995379cdf5b13 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -5,37 +5,42 @@ define amdgpu_kernel void @round_f64(double addrspace(1)* %out, double %x) #0 { ; SI-LABEL: round_f64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s9, 0xfffff -; SI-NEXT: s_mov_b32 s8, s6 -; SI-NEXT: v_mov_b32_e32 v2, 0x3ff00000 +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_bfe_u32 s0, s3, 0xb0014 -; SI-NEXT: s_addk_i32 s0, 0xfc01 -; SI-NEXT: s_lshr_b64 s[8:9], s[8:9], s0 -; SI-NEXT: s_andn2_b64 s[8:9], s[2:3], s[8:9] -; SI-NEXT: s_and_b32 s5, s3, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s0, 0 -; SI-NEXT: s_cselect_b32 s8, 0, s8 -; SI-NEXT: s_cselect_b32 s5, s5, s9 -; SI-NEXT: s_cmp_gt_i32 s0, 51 -; SI-NEXT: s_cselect_b32 s8, s2, s8 -; SI-NEXT: s_cselect_b32 s9, s3, s5 -; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_add_i32 s4, s0, 0xfffffc01 +; SI-NEXT: s_mov_b32 s0, s10 +; SI-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 +; SI-NEXT: s_andn2_b64 s[2:3], s[6:7], s[0:1] +; SI-NEXT: s_and_b32 s0, s7, 0x80000000 +; SI-NEXT: s_cmp_lt_i32 s4, 0 +; SI-NEXT: v_mov_b32_e32 v0, s3 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s4, 51 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v2, s6 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_add_f64 v[2:3], s[6:7], -v[0:1] ; SI-NEXT: s_brev_b32 s0, -2 -; SI-NEXT: v_mov_b32_e32 v3, s3 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: v_bfi_b32 v2, s0, v2, v3 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: v_add_f64 v[0:1], s[8:9], v[0:1] -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: v_mov_b32_e32 v5, s7 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; SI-NEXT: v_bfi_b32 v4, s0, v4, v5 +; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0 +; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: round_f64: @@ -139,56 +144,66 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa define amdgpu_kernel void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 { ; SI-LABEL: round_v2f64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s9, 0xfffff -; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s3, 0xfffff +; SI-NEXT: s_mov_b32 s2, s6 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s3, s7, 0xb0014 -; SI-NEXT: s_addk_i32 s3, 0xfc01 -; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s3 -; SI-NEXT: s_andn2_b64 s[10:11], s[6:7], s[10:11] -; SI-NEXT: s_and_b32 s12, s7, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s3, 0 -; SI-NEXT: s_cselect_b32 s10, 0, s10 -; SI-NEXT: s_cselect_b32 s11, s12, s11 -; SI-NEXT: s_cmp_gt_i32 s3, 51 -; SI-NEXT: s_cselect_b32 s10, s6, s10 -; SI-NEXT: s_cselect_b32 s11, s7, s11 -; SI-NEXT: v_mov_b32_e32 v0, s10 +; SI-NEXT: s_bfe_u32 s0, s11, 0xb0014 +; SI-NEXT: s_add_i32 s7, s0, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s7 +; SI-NEXT: s_andn2_b64 s[12:13], s[10:11], s[0:1] +; SI-NEXT: s_and_b32 s0, s11, 0x80000000 +; SI-NEXT: s_cmp_lt_i32 s7, 0 +; SI-NEXT: v_mov_b32_e32 v0, s13 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s7, 51 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-NEXT: v_mov_b32_e32 v1, s11 -; SI-NEXT: v_add_f64 v[0:1], s[6:7], -v[0:1] -; SI-NEXT: s_brev_b32 s3, -2 -; SI-NEXT: v_mov_b32_e32 v4, 0x3ff00000 -; SI-NEXT: v_mov_b32_e32 v2, s7 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: v_bfi_b32 v2, s3, v4, v2 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014 -; SI-NEXT: v_add_f64 v[2:3], s[10:11], v[0:1] -; SI-NEXT: s_add_i32 s10, s6, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[6:7], s[8:9], s10 -; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7] -; SI-NEXT: s_and_b32 s8, s5, 0x80000000 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s12 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v2, s10 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s9, 0xb0014 +; SI-NEXT: s_add_i32 s10, s0, 0xfffffc01 +; SI-NEXT: s_brev_b32 s7, -2 +; SI-NEXT: v_mov_b32_e32 v6, 0x3ff00000 +; SI-NEXT: v_mov_b32_e32 v4, s11 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s10 +; SI-NEXT: v_bfi_b32 v4, s7, v6, v4 +; SI-NEXT: s_andn2_b64 s[2:3], s[8:9], s[0:1] +; SI-NEXT: s_and_b32 s0, s9, 0x80000000 +; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0 ; SI-NEXT: s_cmp_lt_i32 s10, 0 -; SI-NEXT: s_cselect_b32 s6, 0, s6 -; SI-NEXT: s_cselect_b32 s7, s8, s7 +; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; SI-NEXT: v_mov_b32_e32 v0, s3 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_gt_i32 s10, 51 -; SI-NEXT: s_cselect_b32 s6, s4, s6 -; SI-NEXT: s_cselect_b32 s7, s5, s7 -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: v_add_f64 v[0:1], s[4:5], -v[0:1] -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: v_bfi_b32 v4, s3, v4, v5 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: v_add_f64 v[0:1], s[6:7], v[0:1] -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v4, s8 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; SI-NEXT: v_add_f64 v[4:5], s[8:9], -v[0:1] +; SI-NEXT: v_mov_b32_e32 v7, s9 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 +; SI-NEXT: v_bfi_b32 v6, s7, v6, v7 +; SI-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc +; SI-NEXT: v_mov_b32_e32 v4, 0 +; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: round_v2f64: @@ -227,96 +242,116 @@ define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x dou ; SI-LABEL: round_v4f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s13, 0xfffff -; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xfffff +; SI-NEXT: s_mov_b32 s2, s14 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s12, s7, 0xb0014 -; SI-NEXT: s_add_i32 s16, s12, 0xfffffc01 -; SI-NEXT: s_mov_b32 s12, s2 -; SI-NEXT: s_lshr_b64 s[14:15], s[12:13], s16 -; SI-NEXT: s_andn2_b64 s[14:15], s[6:7], s[14:15] -; SI-NEXT: s_and_b32 s17, s7, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s16, 0 -; SI-NEXT: s_cselect_b32 s14, 0, s14 -; SI-NEXT: s_cselect_b32 s15, s17, s15 -; SI-NEXT: s_cmp_gt_i32 s16, 51 -; SI-NEXT: s_cselect_b32 s14, s6, s14 -; SI-NEXT: s_cselect_b32 s15, s7, s15 -; SI-NEXT: v_mov_b32_e32 v0, s14 -; SI-NEXT: v_mov_b32_e32 v1, s15 -; SI-NEXT: v_add_f64 v[0:1], s[6:7], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014 +; SI-NEXT: s_add_i32 s18, s0, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s18 +; SI-NEXT: s_andn2_b64 s[16:17], s[6:7], s[0:1] +; SI-NEXT: s_and_b32 s0, s7, 0x80000000 +; SI-NEXT: s_cmp_lt_i32 s18, 0 +; SI-NEXT: v_mov_b32_e32 v0, s17 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s18, 51 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s16 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v2, s6 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_add_f64 v[2:3], s[6:7], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s5, 0xb0014 +; SI-NEXT: s_add_i32 s17, s0, 0xfffffc01 ; SI-NEXT: s_brev_b32 s16, -2 -; SI-NEXT: v_mov_b32_e32 v2, s7 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: v_bfi_b32 v2, s16, v8, v2 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014 -; SI-NEXT: v_add_f64 v[2:3], s[14:15], v[0:1] -; SI-NEXT: s_add_i32 s14, s6, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[6:7], s[12:13], s14 -; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7] -; SI-NEXT: s_and_b32 s15, s5, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s14, 0 -; SI-NEXT: s_cselect_b32 s6, 0, s6 -; SI-NEXT: s_cselect_b32 s7, s15, s7 -; SI-NEXT: s_cmp_gt_i32 s14, 51 -; SI-NEXT: s_cselect_b32 s6, s4, s6 -; SI-NEXT: s_cselect_b32 s7, s5, s7 +; SI-NEXT: v_mov_b32_e32 v12, 0x3ff00000 +; SI-NEXT: v_mov_b32_e32 v4, s7 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s17 +; SI-NEXT: v_bfi_b32 v4, s16, v12, v4 +; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[0:1] +; SI-NEXT: s_and_b32 s0, s5, 0x80000000 +; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0 +; SI-NEXT: s_cmp_lt_i32 s17, 0 +; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; SI-NEXT: v_mov_b32_e32 v0, s7 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s17, 51 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] ; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: v_add_f64 v[0:1], s[4:5], -v[0:1] -; SI-NEXT: s_bfe_u32 s4, s11, 0xb0014 -; SI-NEXT: s_add_i32 s14, s4, 0xfffffc01 -; SI-NEXT: v_mov_b32_e32 v4, s5 -; SI-NEXT: s_lshr_b64 s[4:5], s[12:13], s14 -; SI-NEXT: s_andn2_b64 s[4:5], s[10:11], s[4:5] -; SI-NEXT: s_and_b32 s15, s11, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s14, 0 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s15, s5 -; SI-NEXT: s_cmp_gt_i32 s14, 51 -; SI-NEXT: v_bfi_b32 v4, s16, v8, v4 -; SI-NEXT: s_cselect_b32 s4, s10, s4 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-NEXT: s_cselect_b32 s5, s11, s5 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; SI-NEXT: v_mov_b32_e32 v4, s4 -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: v_add_f64 v[4:5], s[10:11], -v[4:5] -; SI-NEXT: v_mov_b32_e32 v6, s11 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; SI-NEXT: s_bfe_u32 s0, s11, 0xb0014 +; SI-NEXT: v_add_f64 v[4:5], s[4:5], -v[0:1] +; SI-NEXT: s_add_i32 s6, s0, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s6 +; SI-NEXT: v_mov_b32_e32 v6, s5 ; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; SI-NEXT: v_bfi_b32 v6, s16, v8, v6 -; SI-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc -; SI-NEXT: v_mov_b32_e32 v4, 0 -; SI-NEXT: v_add_f64 v[6:7], s[4:5], v[4:5] -; SI-NEXT: s_bfe_u32 s4, s9, 0xb0014 -; SI-NEXT: s_add_i32 s10, s4, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[4:5], s[12:13], s10 -; SI-NEXT: s_andn2_b64 s[4:5], s[8:9], s[4:5] -; SI-NEXT: s_and_b32 s11, s9, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s10, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s11, s5 -; SI-NEXT: s_cmp_gt_i32 s10, 51 -; SI-NEXT: s_cselect_b32 s4, s8, s4 -; SI-NEXT: s_cselect_b32 s5, s9, s5 +; SI-NEXT: s_andn2_b64 s[4:5], s[10:11], s[0:1] +; SI-NEXT: s_and_b32 s0, s11, 0x80000000 +; SI-NEXT: v_bfi_b32 v6, s16, v12, v6 +; SI-NEXT: s_cmp_lt_i32 s6, 0 +; SI-NEXT: v_cndmask_b32_e32 v9, 0, v6, vcc +; SI-NEXT: v_mov_b32_e32 v4, s5 +; SI-NEXT: v_mov_b32_e32 v5, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s6, 51 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s11 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] ; SI-NEXT: v_mov_b32_e32 v4, s4 -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: v_add_f64 v[4:5], s[8:9], -v[4:5] -; SI-NEXT: v_mov_b32_e32 v9, s9 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; SI-NEXT: v_bfi_b32 v8, s16, v8, v9 -; SI-NEXT: v_cndmask_b32_e32 v5, 0, v8, vcc -; SI-NEXT: v_mov_b32_e32 v4, 0 -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: v_add_f64 v[4:5], s[4:5], v[4:5] -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: v_add_f64 v[0:1], s[6:7], v[0:1] -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v6, s10 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] +; SI-NEXT: v_add_f64 v[6:7], s[10:11], -v[4:5] +; SI-NEXT: s_bfe_u32 s0, s9, 0xb0014 +; SI-NEXT: s_add_i32 s4, s0, 0xfffffc01 +; SI-NEXT: v_mov_b32_e32 v10, s11 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s4 +; SI-NEXT: v_bfi_b32 v10, s16, v12, v10 +; SI-NEXT: s_andn2_b64 s[2:3], s[8:9], s[0:1] +; SI-NEXT: s_and_b32 s0, s9, 0x80000000 +; SI-NEXT: v_cndmask_b32_e32 v7, 0, v10, vcc +; SI-NEXT: v_mov_b32_e32 v6, 0 +; SI-NEXT: s_cmp_lt_i32 s4, 0 +; SI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] +; SI-NEXT: v_mov_b32_e32 v4, s3 +; SI-NEXT: v_mov_b32_e32 v5, s0 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s4, 51 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] +; SI-NEXT: v_mov_b32_e32 v4, s2 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v10, s8 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1] +; SI-NEXT: v_add_f64 v[10:11], s[8:9], -v[4:5] +; SI-NEXT: v_mov_b32_e32 v13, s9 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[10:11]|, 0.5 +; SI-NEXT: v_bfi_b32 v12, s16, v12, v13 +; SI-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc +; SI-NEXT: v_mov_b32_e32 v10, 0 +; SI-NEXT: v_mov_b32_e32 v8, 0 +; SI-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] +; SI-NEXT: s_mov_b32 s15, 0xf000 +; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[8:9] +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[12:15], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: round_v4f64: @@ -372,178 +407,219 @@ define amdgpu_kernel void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x dou ; SI-LABEL: round_v8f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s22, -1 ; SI-NEXT: s_mov_b32 s21, 0xfffff +; SI-NEXT: s_mov_b32 s20, s22 ; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s20, s7, 0xb0014 -; SI-NEXT: s_add_i32 s24, s20, 0xfffffc01 -; SI-NEXT: s_mov_b32 s20, s2 -; SI-NEXT: s_lshr_b64 s[22:23], s[20:21], s24 -; SI-NEXT: s_andn2_b64 s[22:23], s[6:7], s[22:23] -; SI-NEXT: s_and_b32 s25, s7, 0x80000000 +; SI-NEXT: s_bfe_u32 s2, s7, 0xb0014 +; SI-NEXT: s_add_i32 s26, s2, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s26 +; SI-NEXT: s_and_b32 s23, s7, 0x80000000 +; SI-NEXT: s_andn2_b64 s[24:25], s[6:7], s[2:3] +; SI-NEXT: s_cmp_lt_i32 s26, 0 +; SI-NEXT: v_mov_b32_e32 v0, s25 +; SI-NEXT: v_mov_b32_e32 v1, s23 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s26, 51 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s7 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[2:3] +; SI-NEXT: v_mov_b32_e32 v0, s24 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v2, s6 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[2:3] +; SI-NEXT: v_add_f64 v[2:3], s[6:7], -v[0:1] +; SI-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; SI-NEXT: s_add_i32 s24, s2, 0xfffffc01 +; SI-NEXT: s_brev_b32 s23, -2 +; SI-NEXT: v_mov_b32_e32 v4, s7 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s24 +; SI-NEXT: v_bfi_b32 v4, s23, v8, v4 +; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[2:3] +; SI-NEXT: s_and_b32 s2, s5, 0x80000000 +; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0 ; SI-NEXT: s_cmp_lt_i32 s24, 0 -; SI-NEXT: s_cselect_b32 s22, 0, s22 -; SI-NEXT: s_cselect_b32 s23, s25, s23 +; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; SI-NEXT: v_mov_b32_e32 v0, s7 +; SI-NEXT: v_mov_b32_e32 v1, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 ; SI-NEXT: s_cmp_gt_i32 s24, 51 -; SI-NEXT: s_cselect_b32 s22, s6, s22 -; SI-NEXT: s_cselect_b32 s23, s7, s23 -; SI-NEXT: v_mov_b32_e32 v0, s22 -; SI-NEXT: v_mov_b32_e32 v1, s23 -; SI-NEXT: v_add_f64 v[0:1], s[6:7], -v[0:1] -; SI-NEXT: s_brev_b32 s6, -2 -; SI-NEXT: v_mov_b32_e32 v2, s7 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: v_bfi_b32 v2, s6, v8, v2 -; SI-NEXT: s_bfe_u32 s7, s5, 0xb0014 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: s_addk_i32 s7, 0xfc01 -; SI-NEXT: v_add_f64 v[2:3], s[22:23], v[0:1] -; SI-NEXT: s_lshr_b64 s[22:23], s[20:21], s7 -; SI-NEXT: s_andn2_b64 s[22:23], s[4:5], s[22:23] -; SI-NEXT: s_and_b32 s24, s5, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s22, 0, s22 -; SI-NEXT: s_cselect_b32 s23, s24, s23 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s22, s4, s22 -; SI-NEXT: s_cselect_b32 s23, s5, s23 -; SI-NEXT: v_mov_b32_e32 v0, s22 -; SI-NEXT: v_mov_b32_e32 v1, s23 -; SI-NEXT: v_add_f64 v[0:1], s[4:5], -v[0:1] -; SI-NEXT: v_mov_b32_e32 v4, s5 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; SI-NEXT: s_bfe_u32 s4, s11, 0xb0014 -; SI-NEXT: v_bfi_b32 v4, s6, v8, v4 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: s_lshr_b64 s[4:5], s[20:21], s7 -; SI-NEXT: v_add_f64 v[0:1], s[22:23], v[0:1] -; SI-NEXT: s_andn2_b64 s[4:5], s[10:11], s[4:5] -; SI-NEXT: s_and_b32 s22, s11, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s22, s5 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s4, s10, s4 -; SI-NEXT: s_cselect_b32 s5, s11, s5 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[2:3] +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; SI-NEXT: v_mov_b32_e32 v4, s4 -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: v_add_f64 v[4:5], s[10:11], -v[4:5] -; SI-NEXT: v_mov_b32_e32 v6, s11 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[2:3] +; SI-NEXT: v_add_f64 v[4:5], s[4:5], -v[0:1] +; SI-NEXT: s_bfe_u32 s2, s11, 0xb0014 +; SI-NEXT: s_add_i32 s6, s2, 0xfffffc01 +; SI-NEXT: v_mov_b32_e32 v6, s5 ; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; SI-NEXT: v_bfi_b32 v6, s6, v8, v6 +; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s6 +; SI-NEXT: v_bfi_b32 v6, s23, v8, v6 +; SI-NEXT: s_andn2_b64 s[4:5], s[10:11], s[2:3] +; SI-NEXT: s_and_b32 s2, s11, 0x80000000 ; SI-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc ; SI-NEXT: v_mov_b32_e32 v4, 0 -; SI-NEXT: v_add_f64 v[6:7], s[4:5], v[4:5] -; SI-NEXT: s_bfe_u32 s4, s9, 0xb0014 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[4:5], s[20:21], s7 -; SI-NEXT: s_andn2_b64 s[4:5], s[8:9], s[4:5] -; SI-NEXT: s_and_b32 s10, s9, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s10, s5 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s4, s8, s4 -; SI-NEXT: s_cselect_b32 s5, s9, s5 +; SI-NEXT: s_cmp_lt_i32 s6, 0 +; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; SI-NEXT: v_mov_b32_e32 v4, s5 +; SI-NEXT: v_mov_b32_e32 v5, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s6, 51 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s11 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[2:3] ; SI-NEXT: v_mov_b32_e32 v4, s4 -; SI-NEXT: v_mov_b32_e32 v5, s5 -; SI-NEXT: v_add_f64 v[4:5], s[8:9], -v[4:5] -; SI-NEXT: v_mov_b32_e32 v9, s9 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; SI-NEXT: v_bfi_b32 v9, s6, v8, v9 -; SI-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc -; SI-NEXT: v_mov_b32_e32 v4, 0 -; SI-NEXT: v_add_f64 v[4:5], s[4:5], v[4:5] -; SI-NEXT: s_bfe_u32 s4, s15, 0xb0014 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[4:5], s[20:21], s7 -; SI-NEXT: s_andn2_b64 s[4:5], s[14:15], s[4:5] -; SI-NEXT: s_and_b32 s8, s15, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s8, s5 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s5, s15, s5 -; SI-NEXT: s_cselect_b32 s4, s14, s4 -; SI-NEXT: v_mov_b32_e32 v10, s5 -; SI-NEXT: v_mov_b32_e32 v9, s4 -; SI-NEXT: v_add_f64 v[9:10], s[14:15], -v[9:10] -; SI-NEXT: v_mov_b32_e32 v11, s15 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v6, s10 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[2:3] +; SI-NEXT: v_add_f64 v[6:7], s[10:11], -v[4:5] +; SI-NEXT: s_bfe_u32 s2, s9, 0xb0014 +; SI-NEXT: s_add_i32 s6, s2, 0xfffffc01 +; SI-NEXT: v_mov_b32_e32 v9, s11 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 +; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s6 +; SI-NEXT: v_bfi_b32 v9, s23, v8, v9 +; SI-NEXT: s_andn2_b64 s[4:5], s[8:9], s[2:3] +; SI-NEXT: s_and_b32 s2, s9, 0x80000000 +; SI-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc +; SI-NEXT: v_mov_b32_e32 v6, 0 +; SI-NEXT: s_cmp_lt_i32 s6, 0 +; SI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] +; SI-NEXT: v_mov_b32_e32 v4, s5 +; SI-NEXT: v_mov_b32_e32 v5, s2 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s6, 51 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s9 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[2:3] +; SI-NEXT: v_mov_b32_e32 v4, s4 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v9, s8 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[2:3] +; SI-NEXT: s_bfe_u32 s2, s15, 0xb0014 +; SI-NEXT: v_add_f64 v[9:10], s[8:9], -v[4:5] +; SI-NEXT: s_add_i32 s4, s2, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s4 +; SI-NEXT: v_mov_b32_e32 v11, s9 ; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[9:10]|, 0.5 -; SI-NEXT: v_bfi_b32 v11, s6, v8, v11 +; SI-NEXT: s_andn2_b64 s[24:25], s[14:15], s[2:3] +; SI-NEXT: s_and_b32 s2, s15, 0x80000000 +; SI-NEXT: v_bfi_b32 v11, s23, v8, v11 +; SI-NEXT: s_cmp_lt_i32 s4, 0 ; SI-NEXT: v_cndmask_b32_e32 v10, 0, v11, vcc ; SI-NEXT: v_mov_b32_e32 v9, 0 -; SI-NEXT: v_add_f64 v[10:11], s[4:5], v[9:10] +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: s_cmp_gt_i32 s4, 51 +; SI-NEXT: v_add_f64 v[4:5], v[4:5], v[9:10] +; SI-NEXT: v_mov_b32_e32 v10, s2 +; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 ; SI-NEXT: s_bfe_u32 s4, s13, 0xb0014 -; SI-NEXT: s_add_i32 s7, s4, 0xfffffc01 -; SI-NEXT: s_lshr_b64 s[4:5], s[20:21], s7 -; SI-NEXT: s_andn2_b64 s[4:5], s[12:13], s[4:5] -; SI-NEXT: s_and_b32 s8, s13, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s4, 0, s4 -; SI-NEXT: s_cselect_b32 s5, s8, s5 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s5, s13, s5 -; SI-NEXT: s_cselect_b32 s4, s12, s4 -; SI-NEXT: s_bfe_u32 s7, s19, 0xb0014 -; SI-NEXT: s_addk_i32 s7, 0xfc01 -; SI-NEXT: s_lshr_b64 s[8:9], s[20:21], s7 -; SI-NEXT: s_andn2_b64 s[8:9], s[18:19], s[8:9] -; SI-NEXT: s_and_b32 s10, s19, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: s_cselect_b32 s8, 0, s8 -; SI-NEXT: s_cselect_b32 s9, s10, s9 -; SI-NEXT: s_cmp_gt_i32 s7, 51 -; SI-NEXT: s_cselect_b32 s9, s19, s9 -; SI-NEXT: s_cselect_b32 s8, s18, s8 -; SI-NEXT: s_bfe_u32 s7, s17, 0xb0014 -; SI-NEXT: v_mov_b32_e32 v13, s5 -; SI-NEXT: s_addk_i32 s7, 0xfc01 -; SI-NEXT: v_mov_b32_e32 v12, s4 -; SI-NEXT: s_lshr_b64 s[10:11], s[20:21], s7 -; SI-NEXT: v_add_f64 v[12:13], s[12:13], -v[12:13] -; SI-NEXT: s_andn2_b64 s[10:11], s[16:17], s[10:11] -; SI-NEXT: s_and_b32 s12, s17, 0x80000000 -; SI-NEXT: s_cmp_lt_i32 s7, 0 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[12:13]|, 0.5 -; SI-NEXT: v_mov_b32_e32 v13, s9 -; SI-NEXT: s_cselect_b32 s10, 0, s10 -; SI-NEXT: s_cselect_b32 s11, s12, s11 -; SI-NEXT: s_cmp_gt_i32 s7, 51 +; SI-NEXT: s_add_i32 s6, s4, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[4:5], s[20:21], s6 +; SI-NEXT: s_andn2_b64 s[26:27], s[12:13], s[4:5] +; SI-NEXT: s_and_b32 s4, s13, 0x80000000 +; SI-NEXT: v_mov_b32_e32 v9, s25 +; SI-NEXT: s_cmp_lt_i32 s6, 0 +; SI-NEXT: v_cndmask_b32_e32 v15, v9, v10, vcc +; SI-NEXT: v_mov_b32_e32 v10, s4 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_cmp_gt_i32 s6, 51 +; SI-NEXT: s_cselect_b64 s[6:7], -1, 0 +; SI-NEXT: s_bfe_u32 s8, s19, 0xb0014 +; SI-NEXT: s_add_i32 s10, s8, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[8:9], s[20:21], s10 +; SI-NEXT: s_andn2_b64 s[28:29], s[18:19], s[8:9] +; SI-NEXT: s_and_b32 s8, s19, 0x80000000 +; SI-NEXT: v_mov_b32_e32 v9, s27 +; SI-NEXT: s_cmp_lt_i32 s10, 0 +; SI-NEXT: v_cndmask_b32_e64 v17, v9, v10, s[4:5] +; SI-NEXT: v_mov_b32_e32 v9, s29 +; SI-NEXT: v_mov_b32_e32 v10, s8 +; SI-NEXT: s_cselect_b64 s[8:9], -1, 0 +; SI-NEXT: s_cmp_gt_i32 s10, 51 +; SI-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[8:9] +; SI-NEXT: v_mov_b32_e32 v10, s19 +; SI-NEXT: s_cselect_b64 s[10:11], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v10, v9, v10, s[10:11] +; SI-NEXT: v_mov_b32_e32 v9, s28 +; SI-NEXT: v_cndmask_b32_e64 v9, v9, 0, s[8:9] +; SI-NEXT: v_mov_b32_e32 v11, s18 +; SI-NEXT: s_bfe_u32 s8, s17, 0xb0014 +; SI-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[10:11] +; SI-NEXT: s_add_i32 s10, s8, 0xfffffc01 +; SI-NEXT: s_lshr_b64 s[8:9], s[20:21], s10 +; SI-NEXT: s_andn2_b64 s[20:21], s[16:17], s[8:9] +; SI-NEXT: s_and_b32 s8, s17, 0x80000000 +; SI-NEXT: s_cmp_lt_i32 s10, 0 +; SI-NEXT: v_mov_b32_e32 v11, s21 ; SI-NEXT: v_mov_b32_e32 v12, s8 -; SI-NEXT: s_cselect_b32 s11, s17, s11 -; SI-NEXT: v_mov_b32_e32 v9, s13 -; SI-NEXT: v_add_f64 v[12:13], s[18:19], -v[12:13] -; SI-NEXT: s_cselect_b32 s10, s16, s10 -; SI-NEXT: v_mov_b32_e32 v15, s11 -; SI-NEXT: v_bfi_b32 v9, s6, v8, v9 -; SI-NEXT: v_mov_b32_e32 v14, s10 -; SI-NEXT: v_cndmask_b32_e32 v17, 0, v9, vcc -; SI-NEXT: v_mov_b32_e32 v9, s19 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[12:13]|, 0.5 -; SI-NEXT: v_add_f64 v[14:15], s[16:17], -v[14:15] -; SI-NEXT: v_bfi_b32 v9, s6, v8, v9 -; SI-NEXT: v_cndmask_b32_e32 v13, 0, v9, vcc -; SI-NEXT: v_mov_b32_e32 v9, s17 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[14:15]|, 0.5 -; SI-NEXT: v_bfi_b32 v8, s6, v8, v9 -; SI-NEXT: v_mov_b32_e32 v12, 0 -; SI-NEXT: v_cndmask_b32_e32 v9, 0, v8, vcc -; SI-NEXT: v_mov_b32_e32 v8, 0 -; SI-NEXT: v_mov_b32_e32 v16, 0 -; SI-NEXT: v_add_f64 v[14:15], s[8:9], v[12:13] -; SI-NEXT: v_add_f64 v[12:13], s[10:11], v[8:9] -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: v_add_f64 v[8:9], s[4:5], v[16:17] -; SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_cselect_b64 s[8:9], -1, 0 +; SI-NEXT: s_cmp_gt_i32 s10, 51 +; SI-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[8:9] +; SI-NEXT: v_mov_b32_e32 v12, s17 +; SI-NEXT: s_cselect_b64 s[10:11], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v14, v11, v12, s[10:11] +; SI-NEXT: v_mov_b32_e32 v11, s20 +; SI-NEXT: v_cndmask_b32_e64 v11, v11, 0, s[8:9] +; SI-NEXT: v_mov_b32_e32 v12, s16 +; SI-NEXT: v_cndmask_b32_e64 v13, v11, v12, s[10:11] +; SI-NEXT: v_add_f64 v[11:12], s[16:17], -v[13:14] +; SI-NEXT: v_mov_b32_e32 v19, s17 +; SI-NEXT: v_cmp_ge_f64_e64 s[8:9], |v[11:12]|, 0.5 +; SI-NEXT: v_mov_b32_e32 v11, s19 +; SI-NEXT: v_bfi_b32 v20, s23, v8, v11 +; SI-NEXT: v_add_f64 v[11:12], s[18:19], -v[9:10] +; SI-NEXT: v_bfi_b32 v19, s23, v8, v19 +; SI-NEXT: v_cmp_ge_f64_e64 s[10:11], |v[11:12]|, 0.5 +; SI-NEXT: v_mov_b32_e32 v11, 0 +; SI-NEXT: v_cndmask_b32_e64 v12, 0, v20, s[10:11] +; SI-NEXT: v_add_f64 v[11:12], v[9:10], v[11:12] +; SI-NEXT: v_cndmask_b32_e64 v10, 0, v19, s[8:9] +; SI-NEXT: v_mov_b32_e32 v9, 0 +; SI-NEXT: v_mov_b32_e32 v16, s15 +; SI-NEXT: v_add_f64 v[9:10], v[13:14], v[9:10] +; SI-NEXT: v_mov_b32_e32 v13, s24 +; SI-NEXT: v_cndmask_b32_e64 v14, v15, v16, s[2:3] +; SI-NEXT: v_cndmask_b32_e64 v13, v13, 0, vcc +; SI-NEXT: v_mov_b32_e32 v15, s14 +; SI-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[2:3] +; SI-NEXT: v_mov_b32_e32 v15, s15 +; SI-NEXT: v_bfi_b32 v19, s23, v8, v15 +; SI-NEXT: v_mov_b32_e32 v15, s26 +; SI-NEXT: v_mov_b32_e32 v18, s13 +; SI-NEXT: v_cndmask_b32_e64 v15, v15, 0, s[4:5] +; SI-NEXT: v_mov_b32_e32 v16, s12 +; SI-NEXT: v_cndmask_b32_e64 v18, v17, v18, s[6:7] +; SI-NEXT: v_cndmask_b32_e64 v17, v15, v16, s[6:7] +; SI-NEXT: v_mov_b32_e32 v15, s13 +; SI-NEXT: v_bfi_b32 v8, s23, v8, v15 +; SI-NEXT: v_add_f64 v[15:16], s[12:13], -v[17:18] +; SI-NEXT: s_load_dwordx2 s[20:21], s[0:1], 0x9 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[15:16]|, 0.5 +; SI-NEXT: v_add_f64 v[15:16], s[14:15], -v[13:14] +; SI-NEXT: s_mov_b32 s23, 0xf000 +; SI-NEXT: v_cmp_ge_f64_e64 s[0:1], |v[15:16]|, 0.5 +; SI-NEXT: v_mov_b32_e32 v15, 0 +; SI-NEXT: v_cndmask_b32_e64 v16, 0, v19, s[0:1] +; SI-NEXT: v_add_f64 v[15:16], v[13:14], v[15:16] +; SI-NEXT: v_cndmask_b32_e32 v14, 0, v8, vcc +; SI-NEXT: v_mov_b32_e32 v13, 0 +; SI-NEXT: v_add_f64 v[13:14], v[17:18], v[13:14] +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[20:23], 0 offset:48 +; SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[20:23], 0 offset:32 +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[20:23], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[20:23], 0 ; SI-NEXT: s_endpgm ; ; CI-LABEL: round_v8f64: diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll index 10878248f46be..6c654b2fcb793 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -3037,17 +3037,17 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa ; GCN-HSA-NEXT: s_lshr_b32 s35, s8, 16 ; GCN-HSA-NEXT: s_lshr_b32 s37, s11, 16 ; GCN-HSA-NEXT: s_lshr_b32 s38, s10, 16 -; GCN-HSA-NEXT: s_lshr_b32 s39, s13, 16 -; GCN-HSA-NEXT: s_lshr_b32 s40, s12, 16 -; GCN-HSA-NEXT: s_lshr_b32 s41, s15, 16 -; GCN-HSA-NEXT: s_lshr_b32 s42, s14, 16 +; GCN-HSA-NEXT: s_lshr_b32 s40, s13, 16 +; GCN-HSA-NEXT: s_lshr_b32 s41, s12, 16 +; GCN-HSA-NEXT: s_lshr_b32 s42, s15, 16 +; GCN-HSA-NEXT: s_lshr_b32 s43, s14, 16 ; GCN-HSA-NEXT: s_and_b32 s25, s1, 0xffff ; GCN-HSA-NEXT: s_and_b32 s27, s0, 0xffff ; GCN-HSA-NEXT: s_and_b32 s29, s3, 0xffff ; GCN-HSA-NEXT: s_and_b32 s31, s2, 0xffff ; GCN-HSA-NEXT: s_and_b32 s34, s5, 0xffff ; GCN-HSA-NEXT: s_and_b32 s36, s4, 0xffff -; GCN-HSA-NEXT: s_and_b32 s43, s7, 0xffff +; GCN-HSA-NEXT: s_and_b32 s39, s7, 0xffff ; GCN-HSA-NEXT: s_and_b32 s44, s6, 0xffff ; GCN-HSA-NEXT: s_and_b32 s45, s9, 0xffff ; GCN-HSA-NEXT: s_and_b32 s46, s8, 0xffff @@ -3172,13 +3172,13 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa ; GCN-HSA-NEXT: v_mov_b32_e32 v8, s52 ; GCN-HSA-NEXT: v_mov_b32_e32 v7, s18 ; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v9, s42 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s43 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s50 ; GCN-HSA-NEXT: v_mov_b32_e32 v10, s51 -; GCN-HSA-NEXT: v_mov_b32_e32 v11, s41 -; GCN-HSA-NEXT: v_mov_b32_e32 v1, s40 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s42 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, s41 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s49 -; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, s40 ; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[20:23] ; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] ; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] @@ -3206,7 +3206,7 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa ; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s44 ; GCN-HSA-NEXT: v_mov_b32_e32 v1, s30 -; GCN-HSA-NEXT: v_mov_b32_e32 v2, s43 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 ; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 ; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] @@ -6181,129 +6181,129 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-HSA-NEXT: s_lshr_b32 s33, s4, 16 ; GCN-HSA-NEXT: s_lshr_b32 s34, s2, 16 ; GCN-HSA-NEXT: s_lshr_b32 s18, s0, 16 -; GCN-HSA-NEXT: s_and_b32 s35, s0, 0xffff -; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff +; GCN-HSA-NEXT: s_and_b32 s0, s0, 0xffff +; GCN-HSA-NEXT: s_and_b32 s35, s2, 0xffff ; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff ; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff ; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff ; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff ; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff ; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff -; GCN-HSA-NEXT: s_and_b32 s36, s1, 0xffff -; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff +; GCN-HSA-NEXT: s_and_b32 s1, s1, 0xffff +; GCN-HSA-NEXT: s_and_b32 s36, s3, 0xffff ; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff ; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff ; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff ; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff ; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff ; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xf0 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xd0 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xb0 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x90 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 -; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xf0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xd0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xb0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x90 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s15 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x70 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s13 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 ; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 ; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s9 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x50 ; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 48 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 16 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xe0 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xe0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s1 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xc0 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xc0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xa0 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0xa0 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s28 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x80 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x80 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 0x60 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 64 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 -; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCN-HSA-NEXT: s_add_u32 s2, s16, 32 ; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s33 -; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 +; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s34 -; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 -; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 ; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 ; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 ; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] @@ -6589,17 +6589,17 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s12, 16 ; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16 ; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s8, 16 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[48:49], s[20:21], 0x100000 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[18:19], 0x100000 -; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s6, 16 -; GCN-NOHSA-SI-NEXT: s_lshr_b32 s54, s4, 16 -; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s2, 16 -; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s0, 16 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[20:21], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[52:53], s[18:19], 0x100000 +; GCN-NOHSA-SI-NEXT: s_lshr_b32 s54, s6, 16 +; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s4, 16 +; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s2, 16 +; GCN-NOHSA-SI-NEXT: s_lshr_b32 s60, s0, 16 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[4:5], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x100000 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[60:61], s[8:9], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[48:49], s[8:9], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[62:63], s[10:11], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[64:65], s[12:13], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[66:67], s[14:15], 0x100000 @@ -6613,12 +6613,12 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s51 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s52 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s53 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s48 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s49 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s50 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s51 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s2 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s3 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 @@ -6647,10 +6647,10 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s4 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s5 ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[58:59], 0x100000 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[56:57], 0x100000 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[54:55], 0x100000 -; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[52:53], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[60:61], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[58:59], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[56:57], 0x100000 +; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[54:55], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[30:31], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[28:29], 0x100000 ; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[26:27], 0x100000 @@ -6678,8 +6678,8 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s65 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s63 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s61 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s48 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s49 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s34 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s35 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s24 @@ -6923,123 +6923,123 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspa ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NOHSA-VI-NEXT: s_mov_b32 s36, s15 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s38, s13 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s38, s15 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s40, s13 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[82:83], s[14:15], 48 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s40, s11 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s3 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s50, s1 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s2, 16 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s0, 16 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s11 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s50, s3 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s52, s1 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s2, 16 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s68, s0, 16 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 -; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[68:69], s[0:1], 48 +; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[36:37], s[0:1], 48 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[70:71], s[2:3], 48 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[80:81], s[12:13], 48 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s16 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s17 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x100000 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s82 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s83 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s9 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s44, s9 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[78:79], s[10:11], 48 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s44, s7 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s46, s7 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s80 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s81 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[76:77], s[8:9], 48 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s46, s5 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s5 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s78 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s79 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[74:75], s[6:7], 48 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 ; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[72:73], s[4:5], 48 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s76 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s77 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s74 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s75 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s14, 16 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s14, 16 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s72 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s73 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s12, 16 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s12, 16 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s70 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s71 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s10, 16 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s10, 16 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s52 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s53 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s36 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s37 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[54:55], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[56:57], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s8, 16 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s8, 16 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52 -; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s54 +; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x100000 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[56:57], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[58:59], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s6, 16 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s6, 16 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[58:59], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[60:61], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 -; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s4, 16 +; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s4, 16 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[60:61], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 ; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[62:63], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[64:65], 0x100000 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[64:65], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[66:67], 0x100000 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 -; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x100000 +; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[68:69], 0x100000 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 ; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index 8eeee8d653afb..817683b46307b 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -7307,7 +7307,8 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace ; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v3 +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v21, 0xffff, v3 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v5 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v5 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v6 @@ -7348,6 +7349,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v1 +; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, v21 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 diff --git a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll index 86bf754ecfcf0..407a4e5f1b764 100644 --- a/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/load-select-ptr.ll @@ -9,8 +9,8 @@ ; GCN: s_load_dwordx2 ; GCN: s_cmp_eq_u32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32 +; GCN: v_cndmask_b32 ; GCN-NOT: load_dword ; GCN: flat_load_dwordx2 @@ -35,8 +35,8 @@ define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], i64* % ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32 +; GCN: v_cndmask_b32 ; GCN: flat_store_dwordx2 define amdgpu_kernel void @select_ptr_crash_i64_global(i32 %tmp, [8 x i32], i64 addrspace(1)* %ptr0, [8 x i32], i64 addrspace(1)* %ptr1, [8 x i32], i64 addrspace(1)* %ptr2) { %tmp2 = icmp eq i32 %tmp, 0 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 2077c6db32997..889636895cd56 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -141,11 +141,11 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) { ; MUBUF-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:4 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe00000 -; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6 ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc ; MUBUF-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; MUBUF-NEXT: s_waitcnt vmcnt(0) +; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -180,11 +180,11 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out) { ; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:64 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_addk_i32 s32, 0x8000 -; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc ; FLATSCR-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir index 6a2742a772bf3..f8d3159689450 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir @@ -16,14 +16,14 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY3]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -32,27 +32,27 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_MOV_B64_term]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[S_MOV_B64_term]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[PRED_COPY4]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[COPY5]], implicit $exec - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY1]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[PRED_COPY5]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]] - ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %10:vreg_64, [[COPY66]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[COPY66]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY8]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY66:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY6]] + ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %10:vreg_64, [[PRED_COPY66]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY66]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY8]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY8]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -61,7 +61,7 @@ body: | successors: %bb.2(0x40000000), %bb.1(0x40000000) liveins: $vgpr0 - %0:vgpr_32 = COPY killed $vgpr0 + %0:vgpr_32 = PRED_COPY killed $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %0, implicit $exec %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -80,7 +80,7 @@ body: | %9:vgpr_32 = PHI %8, %bb.1, %7, %bb.2, %1, %bb.0 GLOBAL_STORE_DWORD undef %10:vreg_64, %9, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - %7:vgpr_32 = COPY killed %9 + %7:vgpr_32 = PRED_COPY killed %9 %5:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.2 @@ -97,14 +97,14 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B32_e32_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[PRED_COPY]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY3]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -113,33 +113,33 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_MOV_B64_term]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY1]] + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[S_MOV_B64_term]] ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY5]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY6]], implicit-def $scc + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sreg_64_xexec = PRED_COPY killed [[PRED_COPY5]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[PRED_COPY6]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[COPY4]], implicit $exec - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[PRED_COPY4]], implicit $exec + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] - ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %11:vreg_64, [[COPY77]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY77]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY8]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY9]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY77:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] + ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %11:vreg_64, [[PRED_COPY77]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY77]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY8]] + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY9]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -148,7 +148,7 @@ body: | successors: %bb.3(0x40000000), %bb.1(0x40000000) liveins: $vgpr0 - %0:vgpr_32 = COPY killed $vgpr0 + %0:vgpr_32 = PRED_COPY killed $vgpr0 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %0, implicit $exec %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -164,7 +164,7 @@ body: | bb.2: successors: %bb.3(0x80000000) - %8:sreg_64_xexec = COPY %4 + %8:sreg_64_xexec = PRED_COPY %4 SI_END_CF killed %8, implicit-def $exec, implicit-def dead $scc, implicit $exec %9:vgpr_32 = nsw V_ADD_U32_e32 1, killed %6, implicit $exec @@ -173,7 +173,7 @@ body: | %10:vgpr_32 = PHI %9, %bb.2, %7, %bb.3, %1, %bb.0 GLOBAL_STORE_DWORD undef %11:vreg_64, %10, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) - %7:vgpr_32 = COPY killed %10 + %7:vgpr_32 = PRED_COPY killed %10 %5:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir index 95a96b5d7d886..8895de5488978 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -21,9 +21,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -68,9 +68,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec @@ -209,9 +209,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec @@ -221,23 +221,23 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] - ; CHECK-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] + ; CHECK-NEXT: dead [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]] - ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_SLEEP 1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY1]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[PRED_COPY1]], implicit-def dead $scc ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir index 028d511c6bf86..8ae8b3612d3c2 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir @@ -55,7 +55,7 @@ body: | ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3 ; CHECK-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY [[S_ANDN2_B64_]] ; CHECK-NEXT: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; CHECK-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF @@ -67,25 +67,25 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) ; CHECK-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]] - ; CHECK-NEXT: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY7]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_7]] + ; CHECK-NEXT: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 killed [[FLAT_LOAD_DWORD1]], killed [[COPY6]], implicit $exec ; CHECK-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[V_LSHR_B32_e32_]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[PHI1]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY7]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[PHI1]] - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[PHI1]] - ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY9]], implicit-def dead $scc + ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 killed [[S_AND_B64_]], [[COPY8]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[COPY6]], $exec, implicit-def $scc + ; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PRED_COPY]], $exec, implicit-def $scc ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_OR_B64_]], $exec, implicit-def $scc ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x00000000), %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[PRED_COPY]], %bb.1, [[S_OR_B64_1]], %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.1, [[S_MOV_B64_2]], %bb.2 ; CHECK-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll index 53020b0080b2c..1e3e8fdfb546c 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -17,7 +17,7 @@ define amdgpu_kernel void @kern_noargs() { define amdgpu_kernel void @kern_i8(i8 %arg) #0 { ; HSA-LABEL: @kern_i8( -; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -26,7 +26,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i8( -; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -40,7 +40,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 { define amdgpu_kernel void @kern_i16(i16 %arg) #0 { ; HSA-LABEL: @kern_i16( -; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -49,7 +49,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i16( -; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -63,7 +63,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 { define amdgpu_kernel void @kern_f16(half %arg) #0 { ; HSA-LABEL: @kern_f16( -; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -73,7 +73,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_f16( -; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -88,7 +88,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 { define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { ; HSA-LABEL: @kern_zeroext_i8( -; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -97,7 +97,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_zeroext_i8( -; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -111,7 +111,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { ; HSA-LABEL: @kern_zeroext_i16( -; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -120,7 +120,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_zeroext_i16( -; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -134,7 +134,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { ; HSA-LABEL: @kern_signext_i8( -; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -143,7 +143,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_signext_i8( -; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -157,7 +157,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { ; HSA-LABEL: @kern_signext_i16( -; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -166,7 +166,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_signext_i16( -; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -180,7 +180,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { ; HSA-LABEL: @kern_i8_i8( -; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -195,7 +195,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i8_i8( -; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -216,7 +216,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { ; HSA-LABEL: @kern_v3i8( -; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -226,7 +226,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v3i8( -; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -241,7 +241,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { define amdgpu_kernel void @kern_i24(i24 %arg0) { ; HSA-LABEL: @kern_i24( -; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -250,7 +250,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i24( -; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -264,7 +264,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) { define amdgpu_kernel void @kern_i32(i32 %arg0) { ; HSA-LABEL: @kern_i32( -; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -272,7 +272,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i32( -; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -285,7 +285,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) { define amdgpu_kernel void @kern_f32(float %arg0) { ; HSA-LABEL: @kern_f32( -; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -293,7 +293,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_f32( -; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -306,7 +306,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) { define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { ; HSA-LABEL: @kern_v3i32( -; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -315,7 +315,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v3i32( -; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { ; HSA-LABEL: @kern_v8i32( -; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -337,7 +337,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v8i32( -; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -350,7 +350,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { ; HSA-LABEL: @kern_v8i64( -; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -358,7 +358,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v8i64( -; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -371,7 +371,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { ; HSA-LABEL: @kern_v16i64( -; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* ; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -379,7 +379,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_v16i64( -; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* ; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -392,7 +392,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { ; HSA-LABEL: @kern_i32_v3i32( -; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -405,7 +405,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_i32_v3i32( -; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -427,7 +427,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { ; HSA-LABEL: @kern_struct_a( -; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -435,7 +435,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_struct_a( -; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -448,7 +448,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { ; HSA-LABEL: @kern_struct_b_packed( -; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -456,7 +456,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_struct_b_packed( -; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -511,7 +511,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { ; HSA-LABEL: @kern_lds_ptr( -; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* ; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -519,7 +519,7 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr( -; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* ; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -532,12 +532,12 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { ; HSA-LABEL: @kern_lds_ptr_si( -; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr_si( -; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 ; MESA-NEXT: ret void ; @@ -547,7 +547,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_i8( -; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -562,7 +562,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_i8( -; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -583,7 +583,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 { ; HSA-LABEL: @kern_realign_i8_i8_i8( -; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -604,7 +604,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) # ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_i8_i8( -; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -632,7 +632,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) # define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 { ; HSA-LABEL: @kern_realign_i8_i8_i8_i8( -; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -659,7 +659,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_i8_i8_i8( -; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -694,7 +694,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2 define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_v3i8( -; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -709,7 +709,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_v3i8( -; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -730,7 +730,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { ; HSA-LABEL: @kern_realign_i8_i16( -; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -745,7 +745,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_i16( -; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -766,7 +766,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_i1( -; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -781,7 +781,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i1_i1( -; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -802,7 +802,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 { ; HSA-LABEL: @kern_realign_i1_i1_i1( -; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -823,7 +823,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) # ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i1_i1_i1( -; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -851,7 +851,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) # define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 { ; HSA-LABEL: @kern_realign_i1_i1_i1_i1( -; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -878,7 +878,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i1_i1_i1_i1( -; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -913,7 +913,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2 define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_v3i1( -; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -929,7 +929,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i1_v3i1( -; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -951,7 +951,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { ; HSA-LABEL: @kern_realign_i1_i16( -; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i1_i16( -; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -987,7 +987,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 { ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( -; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -1032,7 +1032,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( -; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -1088,7 +1088,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { ; HSA-LABEL: @kern_realign_f16_f16( -; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 @@ -1105,7 +1105,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_realign_f16_f16( -; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* ; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 @@ -1128,7 +1128,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 { ; HSA-LABEL: @kern_global_ptr( -; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1136,7 +1136,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr( -; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1149,7 +1149,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 { define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 { ; HSA-LABEL: @kern_global_ptr_dereferencable( -; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1 @@ -1157,7 +1157,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr_dereferencable( -; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1 @@ -1170,7 +1170,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 { ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null( -; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2 @@ -1178,7 +1178,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null( -; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2 @@ -1191,7 +1191,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1 define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 { ; HSA-LABEL: @kern_nonnull_global_ptr( -; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0 @@ -1199,7 +1199,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_nonnull_global_ptr( -; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0 @@ -1212,7 +1212,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 { ; HSA-LABEL: @kern_align32_global_ptr( -; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3 @@ -1220,7 +1220,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 % ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_align32_global_ptr( -; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3 @@ -1233,12 +1233,12 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 % define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 { ; HSA-LABEL: @kern_noalias_global_ptr( -; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_noalias_global_ptr( -; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; MESA-NEXT: ret void ; @@ -1248,13 +1248,13 @@ define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 { ; HSA-LABEL: @kern_noalias_global_ptr_x2( -; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_noalias_global_ptr_x2( -; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8 ; MESA-NEXT: ret void @@ -1267,7 +1267,7 @@ define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias % define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { ; HSA-LABEL: @struct_i8_i8_arg( ; HSA-NEXT: entry: -; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1279,7 +1279,7 @@ define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { ; ; MESA-LABEL: @struct_i8_i8_arg( ; MESA-NEXT: entry: -; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1300,7 +1300,7 @@ entry: define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { ; HSA-LABEL: @struct_i8_i16_arg( ; HSA-NEXT: entry: -; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* ; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1312,7 +1312,7 @@ define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { ; ; MESA-LABEL: @struct_i8_i16_arg( ; MESA-NEXT: entry: -; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* ; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1333,7 +1333,7 @@ entry: define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { ; HSA-LABEL: @array_2xi8_arg( ; HSA-NEXT: entry: -; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1345,7 +1345,7 @@ define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { ; ; MESA-LABEL: @array_2xi8_arg( ; MESA-NEXT: entry: -; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1366,7 +1366,7 @@ entry: define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { ; HSA-LABEL: @array_2xi1_arg( ; HSA-NEXT: entry: -; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* ; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1378,7 +1378,7 @@ define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { ; ; MESA-LABEL: @array_2xi1_arg( ; MESA-NEXT: entry: -; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* ; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1398,11 +1398,11 @@ entry: define amdgpu_kernel void @only_empty_struct({} %empty) #0 { ; HSA-LABEL: @only_empty_struct( -; HSA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: ret void ; ; MESA-LABEL: @only_empty_struct( -; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: ret void ; ret void @@ -1410,7 +1410,7 @@ define amdgpu_kernel void @only_empty_struct({} %empty) #0 { define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { ; HSA-LABEL: @empty_struct_with_other( -; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1418,7 +1418,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { ; HSA-NEXT: ret void ; ; MESA-LABEL: @empty_struct_with_other( -; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1433,7 +1433,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { ; HSA-LABEL: @static_alloca_kern_i32( ; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) -; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1442,7 +1442,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { ; ; MESA-LABEL: @static_alloca_kern_i32( ; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) -; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1459,7 +1459,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { ; HSA-LABEL: @dyn_alloca_kernarg_i32( ; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) -; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1470,7 +1470,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { ; ; MESA-LABEL: @dyn_alloca_kernarg_i32( ; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) -; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1489,7 +1489,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { ; Byref pointers should only be treated as offsets from kernarg define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) { ; HSA-LABEL: @byref_constant_i8_arg( -; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1500,7 +1500,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_i8_arg( -; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1518,7 +1518,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) %in.byref) { ; HSA-LABEL: @byref_constant_i16_arg( -; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1530,7 +1530,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_i16_arg( -; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1549,7 +1549,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in.byref, i32 %after.offset) { ; HSA-LABEL: @byref_constant_i32_arg( -; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1564,7 +1564,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_i32_arg( -; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1586,7 +1586,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) %in.byref, i32 %after.offset) { ; HSA-LABEL: @byref_constant_v4i32_arg( -; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(96) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1602,7 +1602,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_v4i32_arg( -; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(92) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1626,7 +1626,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) { ; HSA-LABEL: @byref_align_constant_i32_arg( -; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1641,7 +1641,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_align_constant_i32_arg( -; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1663,7 +1663,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) %in.byref, i32 %after.offset) { ; HSA-LABEL: @byref_natural_align_constant_v16i32_arg( -; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(192) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1679,7 +1679,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_natural_align_constant_v16i32_arg( -; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(188) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1704,7 +1704,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; Also accept byref kernel arguments with other global address spaces. define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) %in.byref) { ; HSA-LABEL: @byref_global_i32_arg( -; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1715,7 +1715,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_global_i32_arg( -; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1732,7 +1732,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) %in.byref) { ; HSA-LABEL: @byref_flat_i32_arg( -; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1743,7 +1743,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_flat_i32_arg( -; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1760,7 +1760,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) %in.byref) { ; HSA-LABEL: @byref_constant_32bit_i32_arg( -; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1771,7 +1771,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_32bit_i32_arg( -; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1788,7 +1788,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) %in.byref) { ; HSA-LABEL: @byref_unknown_as_i32_arg( -; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1799,7 +1799,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_unknown_as_i32_arg( -; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1817,7 +1817,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; Invalid, but should not crash. define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) %in.byref) { ; HSA-LABEL: @byref_local_i32_arg( -; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1828,7 +1828,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_local_i32_arg( -; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1845,7 +1845,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in0.byref, i32 addrspace(4)* byref(i32) %in1.byref, i32 %after.offset) { ; HSA-LABEL: @multi_byref_constant_i32_arg( -; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(80) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 @@ -1864,7 +1864,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-NEXT: ret void ; ; MESA-LABEL: @multi_byref_constant_i32_arg( -; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(76) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)* ; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 @@ -1892,7 +1892,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) %in.byref) { ; HSA-LABEL: @byref_constant_i32_arg_offset0( -; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0 ; HSA-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)* ; HSA-NEXT: [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4 @@ -1900,7 +1900,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; HSA-NEXT: ret void ; ; MESA-LABEL: @byref_constant_i32_arg_offset0( -; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() ; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36 ; MESA-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)* ; MESA-NEXT: [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4 diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir index a3330a9d522c8..b3bbe731369b6 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir @@ -10,7 +10,7 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr0 = COPY $sgpr1 + ; CHECK-NEXT: $sgpr0 = PRED_COPY $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) @@ -25,7 +25,7 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr0_sgpr1 = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1 = PRED_COPY $sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll index db8904ef71e82..8b9f89b3b031c 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll @@ -74,7 +74,7 @@ entry: ; Check that the select instruction is not deleted. ; FUNC-LABEL: {{^}}i24_i32_i32_mad: ; EG: CNDE_INT -; SI: s_cselect +; SI: v_cndmask ; GCN2: s_cselect define amdgpu_kernel void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir index f9801a50dfd74..4f46439428b1e 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir @@ -9,9 +9,9 @@ body: | ; GCN-LABEL: name: merge_flat_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -26,11 +26,11 @@ body: | ; GCN-LABEL: name: merge_flat_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -46,13 +46,13 @@ body: | ; GCN-LABEL: name: merge_flat_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 2, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -69,14 +69,14 @@ body: | ; GCN-LABEL: name: merge_flat_load_dword_5 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 3, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`) - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[FLAT_LOAD_DWORD]] + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[FLAT_LOAD_DWORD]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -94,16 +94,16 @@ body: | ; GCN-LABEL: name: merge_flat_load_dword_6 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -122,9 +122,9 @@ body: | ; GCN-LABEL: name: merge_flat_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) %2:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) @@ -139,9 +139,9 @@ body: | ; GCN-LABEL: name: merge_flat_load_dwordx3_with_dwordx1 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = FLAT_LOAD_DWORDX3 %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `i128* undef`, align 8) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) @@ -156,9 +156,9 @@ body: | ; GCN-LABEL: name: merge_flat_load_dwordx1_with_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX3_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub1_sub2 - ; GCN-NEXT: S_NOP 0, implicit [[COPY1]], implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub1_sub2 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:vreg_64_align2 = IMPLICIT_DEF %2:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 8) diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir index 3a0c973d12456..96d32d7382c50 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-with-global-load-store.mir @@ -9,9 +9,9 @@ body: | ; GCN-LABEL: name: merge_flat_global_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef` + 4, basealign 4) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) @@ -26,9 +26,9 @@ body: | ; GCN-LABEL: name: merge_global_flat_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, basealign 8, addrspace 1) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef` + 4, basealign 8) @@ -43,11 +43,11 @@ body: | ; GCN-LABEL: name: merge_global_flat_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr undef`, align 16) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`, basealign 16) @@ -63,13 +63,13 @@ body: | ; GCN-LABEL: name: merge_global_flat_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef` + 4, align 4, basealign 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 8, addrspace 1) @@ -86,9 +86,9 @@ body: | ; GCN-LABEL: name: merge_flat_global_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 8) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `double* undef`) %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 8, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) @@ -103,9 +103,9 @@ body: | ; GCN-LABEL: name: merge_flat_global_load_dwordx3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX4_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`) %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 4, 0, implicit $exec :: (load (s96) from `<3 x i32> addrspace(1)* undef`, addrspace 1) @@ -120,9 +120,9 @@ body: | ; GCN-LABEL: name: merge_global_flat_load_dwordx3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX4_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX4_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_96_align2 = PRED_COPY killed [[FLAT_LOAD_DWORDX4_]].sub1_sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) %2:vreg_96_align2 = FLAT_LOAD_DWORDX3 %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `<3 x i32>* undef`) @@ -139,9 +139,9 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr undef`) ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF1]], [[DEF]].sub0, 4, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:sreg_64_xexec = IMPLICIT_DEF %2:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `float* undef`, basealign 4) @@ -160,9 +160,9 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF1]], [[DEF]].sub0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr undef` + 4, align 4) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:sreg_64_xexec = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %0.sub0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir index 32d7e4afbaf9d..cdbff9d01a16d 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir @@ -9,9 +9,9 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef` + 4, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef` + 4, basealign 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 8, basealign 4, addrspace 1) @@ -26,11 +26,11 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_3 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -46,13 +46,13 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_4 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -69,14 +69,14 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_5 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 16, 3, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[GLOBAL_LOAD_DWORD]] + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[GLOBAL_LOAD_DWORD]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -94,16 +94,16 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_6 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 16, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -122,9 +122,9 @@ body: | ; GCN-LABEL: name: merge_global_load_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 8, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) @@ -139,9 +139,9 @@ body: | ; GCN-LABEL: name: merge_global_load_dwordx3_with_dwordx1 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 [[DEF]], 12, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 8, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 12, 0, implicit $exec :: (load (s96) from `i128 addrspace(1)* undef`, align 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -156,9 +156,9 @@ body: | ; GCN-LABEL: name: merge_global_load_dwordx1_with_dwordx2 ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 12, 0, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub1_sub2 - ; GCN-NEXT: S_NOP 0, implicit [[COPY1]], implicit [[COPY]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub1_sub2 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY1]], implicit [[PRED_COPY]] %0:vreg_64_align2 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) %1:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 16, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 8, addrspace 1) @@ -238,9 +238,9 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -257,11 +257,11 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub0 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -279,13 +279,13 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -304,16 +304,16 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_96_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[PRED_COPY]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub2 + ; GCN-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]].sub0 + ; GCN-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY2]].sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 20, 3, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + ; GCN-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY4]], implicit [[PRED_COPY5]], implicit [[PRED_COPY3]], implicit [[PRED_COPY1]], implicit [[PRED_COPY6]], implicit [[PRED_COPY7]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) @@ -334,9 +334,9 @@ body: | ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = GLOBAL_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64_align2 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_SADDR]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:sreg_64_xexec = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec :: (load (s64) from `i64 addrspace(1)* undef`, align 4, addrspace 1) @@ -405,9 +405,9 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_2_out_of_order ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY]], implicit [[PRED_COPY1]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, basealign 8, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, align 4, addrspace 1) @@ -422,11 +422,11 @@ body: | ; GCN-LABEL: name: merge_global_load_dword_3_out_of_order ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 [[DEF]], 0, 0, implicit $exec :: (load (s96) from `ptr addrspace(1) undef`, align 16, addrspace 1) - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub0 - ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]] + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_64_align2 = PRED_COPY [[GLOBAL_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY]].sub1 + ; GCN-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY]].sub0 + ; GCN-NEXT: S_NOP 0, implicit [[PRED_COPY2]], implicit [[PRED_COPY3]] %0:vreg_64_align2 = IMPLICIT_DEF %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef` + 4, align 4, addrspace 1) %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32) from `float addrspace(1)* undef`, align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir index d13c34463c17f..2f30527660e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir @@ -2,17 +2,17 @@ # GFX10-LABEL: name: image_load_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -20,17 +20,17 @@ body: | --- # GFX10-LABEL: name: image_load_merged_v1v3_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -39,17 +39,17 @@ body: | # GFX10-LABEL: name: image_load_merged_v2v2 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -58,17 +58,17 @@ body: | # GFX10-LABEL: name: image_load_merged_v2v2_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -77,17 +77,17 @@ body: | # GFX10-LABEL: name: image_load_merged_v3v1 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -96,17 +96,17 @@ body: | # GFX10-LABEL: name: image_load_merged_v3v1_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -119,11 +119,11 @@ body: | name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -141,11 +141,11 @@ body: | name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -160,11 +160,11 @@ body: | name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -178,11 +178,11 @@ body: | name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -196,11 +196,11 @@ body: | name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -215,12 +215,12 @@ body: | name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -234,11 +234,11 @@ body: | name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -252,11 +252,11 @@ body: | name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -270,11 +270,11 @@ body: | name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -288,11 +288,11 @@ body: | name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -306,11 +306,11 @@ body: | name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -324,13 +324,13 @@ body: | name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) - %6:vgpr_32 = COPY %5.sub0 + %6:vgpr_32 = PRED_COPY %5.sub0 %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... @@ -343,11 +343,11 @@ body: | name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ body: | name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -379,11 +379,11 @@ body: | name: image_load_not_merged_11 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -392,17 +392,17 @@ body: | # GFX10-LABEL: name: image_load_mip_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -413,17 +413,17 @@ body: | # GFX10-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -434,17 +434,17 @@ body: | # GFX10-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -453,17 +453,17 @@ body: | # GFX10-LABEL: name: image_load_pck_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -472,17 +472,17 @@ body: | # GFX10-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir index 8d263a56dcb75..fec03000476dd 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx11.mir @@ -2,17 +2,17 @@ # GFX11-LABEL: name: image_load_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -20,17 +20,17 @@ body: | --- # GFX11-LABEL: name: image_load_merged_v1v3_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -39,17 +39,17 @@ body: | # GFX11-LABEL: name: image_load_merged_v2v2 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -58,17 +58,17 @@ body: | # GFX11-LABEL: name: image_load_merged_v2v2_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -77,17 +77,17 @@ body: | # GFX11-LABEL: name: image_load_merged_v3v1 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -96,17 +96,17 @@ body: | # GFX11-LABEL: name: image_load_merged_v3v1_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -119,11 +119,11 @@ body: | name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) @@ -141,11 +141,11 @@ body: | name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) @@ -160,11 +160,11 @@ body: | name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -178,11 +178,11 @@ body: | name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -196,11 +196,11 @@ body: | name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -215,12 +215,12 @@ body: | name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -234,11 +234,11 @@ body: | name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -252,11 +252,11 @@ body: | name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -270,11 +270,11 @@ body: | name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -288,11 +288,11 @@ body: | name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -306,11 +306,11 @@ body: | name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -324,13 +324,13 @@ body: | name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = COPY %5.sub0 + %6:vgpr_32 = PRED_COPY %5.sub0 %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx11 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) ... @@ -343,11 +343,11 @@ body: | name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -361,11 +361,11 @@ body: | name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -379,11 +379,11 @@ body: | name: image_load_not_merged_11 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx11 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx11 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -392,17 +392,17 @@ body: | # GFX11-LABEL: name: image_load_mip_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -413,17 +413,17 @@ body: | # GFX11-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -434,17 +434,17 @@ body: | # GFX11-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx11 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx11 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -453,17 +453,17 @@ body: | # GFX11-LABEL: name: image_load_pck_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -472,17 +472,17 @@ body: | # GFX11-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx11 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx11 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx11 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir index 2e76dddc40729..664ad8c3d1b83 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir @@ -2,17 +2,17 @@ # GFX9-LABEL: name: image_load_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -21,17 +21,17 @@ body: | # GFX9-LABEL: name: image_load_merged_v1v3_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_load_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -40,17 +40,17 @@ body: | # GFX9-LABEL: name: image_load_merged_v2v2 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_load_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -59,17 +59,17 @@ body: | # GFX9-LABEL: name: image_load_merged_v2v2_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_load_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -78,17 +78,17 @@ body: | # GFX9-LABEL: name: image_load_merged_v3v1 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_load_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -97,17 +97,17 @@ body: | # GFX9-LABEL: name: image_load_merged_v3v1_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_load_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -120,11 +120,11 @@ body: | name: image_load_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -142,11 +142,11 @@ body: | name: image_load_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -161,11 +161,11 @@ body: | name: image_load_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -179,11 +179,11 @@ body: | name: image_load_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -197,11 +197,11 @@ body: | name: image_load_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -216,12 +216,12 @@ body: | name: image_load_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -235,11 +235,11 @@ body: | name: image_load_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -253,11 +253,11 @@ body: | name: image_load_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -271,11 +271,11 @@ body: | name: image_load_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -289,11 +289,11 @@ body: | name: image_load_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -307,11 +307,11 @@ body: | name: image_load_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -325,11 +325,11 @@ body: | name: image_load_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -343,11 +343,11 @@ body: | name: image_load_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ body: | name: image_load_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -374,17 +374,17 @@ body: | # GFX9-LABEL: name: image_load_mip_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -393,17 +393,17 @@ body: | # GFX9-LABEL: name: image_load_mip_pck_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -412,17 +412,17 @@ body: | # GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_mip_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -431,17 +431,17 @@ body: | # GFX9-LABEL: name: image_load_pck_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -450,17 +450,17 @@ body: | # GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_load_pck_sgn_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir index 3602436d3b518..7acf630815aa7 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir @@ -2,17 +2,17 @@ # GFX10-LABEL: name: image_sample_l_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -20,17 +20,17 @@ body: | --- # GFX10-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -39,17 +39,17 @@ body: | # GFX10-LABEL: name: image_sample_l_merged_v2v2 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -58,17 +58,17 @@ body: | # GFX10-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -77,17 +77,17 @@ body: | # GFX10-LABEL: name: image_sample_l_merged_v3v1 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -96,17 +96,17 @@ body: | # GFX10-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -119,11 +119,11 @@ body: | name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -141,11 +141,11 @@ body: | name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -160,11 +160,11 @@ body: | name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -178,11 +178,11 @@ body: | name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -196,11 +196,11 @@ body: | name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -215,12 +215,12 @@ body: | name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -234,12 +234,12 @@ body: | name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -253,11 +253,11 @@ body: | name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -271,11 +271,11 @@ body: | name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -289,11 +289,11 @@ body: | name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -307,11 +307,11 @@ body: | name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -325,11 +325,11 @@ body: | name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -343,11 +343,11 @@ body: | name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -361,11 +361,11 @@ body: | name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -379,11 +379,11 @@ body: | name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -395,17 +395,17 @@ body: | # GFX10-LABEL: name: image_sample_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -415,17 +415,17 @@ body: | # GFX10-LABEL: name: image_sample_b_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -435,17 +435,17 @@ body: | # GFX10-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -455,17 +455,17 @@ body: | # GFX10-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -475,17 +475,17 @@ body: | # GFX10-LABEL: name: image_sample_b_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -495,17 +495,17 @@ body: | # GFX10-LABEL: name: image_sample_c_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -515,17 +515,17 @@ body: | # GFX10-LABEL: name: image_sample_cd_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -535,17 +535,17 @@ body: | # GFX10-LABEL: name: image_sample_cd_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -555,17 +555,17 @@ body: | # GFX10-LABEL: name: image_sample_cd_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -575,17 +575,17 @@ body: | # GFX10-LABEL: name: image_sample_cd_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -595,17 +595,17 @@ body: | # GFX10-LABEL: name: image_sample_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -615,17 +615,17 @@ body: | # GFX10-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -635,17 +635,17 @@ body: | # GFX10-LABEL: name: image_sample_c_b_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -655,17 +655,17 @@ body: | # GFX10-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -675,17 +675,17 @@ body: | # GFX10-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -695,17 +695,17 @@ body: | # GFX10-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -715,17 +715,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cd_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -735,17 +735,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cd_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -755,17 +755,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -775,17 +775,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cd_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -795,17 +795,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -815,17 +815,17 @@ body: | # GFX10-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -835,17 +835,17 @@ body: | # GFX10-LABEL: name: image_sample_c_d_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -855,17 +855,17 @@ body: | # GFX10-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -875,17 +875,17 @@ body: | # GFX10-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -895,17 +895,17 @@ body: | # GFX10-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -915,17 +915,17 @@ body: | # GFX10-LABEL: name: image_sample_c_l_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -935,17 +935,17 @@ body: | # GFX10-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -955,17 +955,17 @@ body: | # GFX10-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -975,17 +975,17 @@ body: | # GFX10-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -995,17 +995,17 @@ body: | # GFX10-LABEL: name: image_sample_c_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1015,17 +1015,17 @@ body: | # GFX10-LABEL: name: image_sample_d_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1035,17 +1035,17 @@ body: | # GFX10-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1055,17 +1055,17 @@ body: | # GFX10-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1075,17 +1075,17 @@ body: | # GFX10-LABEL: name: image_sample_d_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1095,17 +1095,17 @@ body: | # GFX10-LABEL: name: image_sample_lz_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1115,17 +1115,17 @@ body: | # GFX10-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1135,17 +1135,17 @@ body: | # GFX10-LABEL: name: image_sample_l_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1155,17 +1155,17 @@ body: | # GFX10-LABEL: name: image_sample_o_merged_v1v3 # GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir index 4d8e6f61628b0..906a9dffe07c0 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx11.mir @@ -2,17 +2,17 @@ # GFX11-LABEL: name: image_sample_l_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -20,17 +20,17 @@ body: | --- # GFX11-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -39,17 +39,17 @@ body: | # GFX11-LABEL: name: image_sample_l_merged_v2v2 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -58,17 +58,17 @@ body: | # GFX11-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) @@ -77,17 +77,17 @@ body: | # GFX11-LABEL: name: image_sample_l_merged_v3v1 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -96,17 +96,17 @@ body: | # GFX11-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -119,11 +119,11 @@ body: | name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) @@ -141,11 +141,11 @@ body: | name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) IMAGE_STORE_V4_V2_nsa_gfx11 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) @@ -160,11 +160,11 @@ body: | name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -178,11 +178,11 @@ body: | name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -196,11 +196,11 @@ body: | name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) @@ -215,12 +215,12 @@ body: | name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -234,12 +234,12 @@ body: | name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -253,11 +253,11 @@ body: | name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -271,11 +271,11 @@ body: | name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -289,11 +289,11 @@ body: | name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -307,11 +307,11 @@ body: | name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -325,11 +325,11 @@ body: | name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx11 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -343,11 +343,11 @@ body: | name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -361,11 +361,11 @@ body: | name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -379,11 +379,11 @@ body: | name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx11 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx11 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -395,17 +395,17 @@ body: | # GFX11-LABEL: name: image_sample_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx11 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -415,17 +415,17 @@ body: | # GFX11-LABEL: name: image_sample_b_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -435,17 +435,17 @@ body: | # GFX11-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -455,17 +455,17 @@ body: | # GFX11-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -475,17 +475,17 @@ body: | # GFX11-LABEL: name: image_sample_b_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -495,17 +495,17 @@ body: | # GFX11-LABEL: name: image_sample_c_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -515,17 +515,17 @@ body: | # GFX11-LABEL: name: image_sample_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -535,17 +535,17 @@ body: | # GFX11-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -555,17 +555,17 @@ body: | # GFX11-LABEL: name: image_sample_c_b_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -575,17 +575,17 @@ body: | # GFX11-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -595,17 +595,17 @@ body: | # GFX11-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_192 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -615,17 +615,17 @@ body: | # GFX11-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -635,17 +635,17 @@ body: | # GFX11-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -655,17 +655,17 @@ body: | # GFX11-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -675,17 +675,17 @@ body: | # GFX11-LABEL: name: image_sample_c_d_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -695,17 +695,17 @@ body: | # GFX11-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -715,17 +715,17 @@ body: | # GFX11-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V16_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_512 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V16_gfx11 %5:vreg_512, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -735,17 +735,17 @@ body: | # GFX11-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -755,17 +755,17 @@ body: | # GFX11-LABEL: name: image_sample_c_l_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -775,17 +775,17 @@ body: | # GFX11-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -795,17 +795,17 @@ body: | # GFX11-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -815,17 +815,17 @@ body: | # GFX11-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx11 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -835,17 +835,17 @@ body: | # GFX11-LABEL: name: image_sample_c_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -855,17 +855,17 @@ body: | # GFX11-LABEL: name: image_sample_d_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_192 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_gfx11 %5:vreg_192, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -875,17 +875,17 @@ body: | # GFX11-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -895,17 +895,17 @@ body: | # GFX11-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_256 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_gfx11 %5:vreg_256, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -915,17 +915,17 @@ body: | # GFX11-LABEL: name: image_sample_d_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_gfx11 %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_224 = IMPLICIT_DEF %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_gfx11 %5:vreg_224, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -935,17 +935,17 @@ body: | # GFX11-LABEL: name: image_sample_lz_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx11 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -955,17 +955,17 @@ body: | # GFX11-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -975,17 +975,17 @@ body: | # GFX11-LABEL: name: image_sample_l_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx11 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) @@ -995,17 +995,17 @@ body: | # GFX11-LABEL: name: image_sample_o_merged_v1v3 # GFX11: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx11 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx11 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir index 0b51a39d989b0..b7ddf0df1e087 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir @@ -2,17 +2,17 @@ # GFX9-LABEL: name: image_sample_l_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -21,17 +21,17 @@ body: | # GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub0_sub1_sub2 name: image_sample_l_merged_v1v3_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -40,17 +40,17 @@ body: | # GFX9-LABEL: name: image_sample_l_merged_v2v2 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub2_sub3 name: image_sample_l_merged_v2v2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -59,17 +59,17 @@ body: | # GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %8.sub0_sub1 name: image_sample_l_merged_v2v2_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) @@ -78,17 +78,17 @@ body: | # GFX9-LABEL: name: image_sample_l_merged_v3v1 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub3 name: image_sample_l_merged_v3v1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -97,17 +97,17 @@ body: | # GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %8.sub0 name: image_sample_l_merged_v3v1_reversed body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -120,11 +120,11 @@ body: | name: image_sample_l_divided_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) @@ -142,11 +142,11 @@ body: | name: image_sample_l_divided_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vreg_128 = COPY %2 + %4:vreg_128 = PRED_COPY %2 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) @@ -161,11 +161,11 @@ body: | name: image_sample_l_dmask_overlapped_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -179,11 +179,11 @@ body: | name: image_sample_l_dmask_not_disjoint_not_merged body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -197,11 +197,11 @@ body: | name: image_sample_l_not_merged_0 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) @@ -216,12 +216,12 @@ body: | name: image_sample_l_not_merged_1 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -235,12 +235,12 @@ body: | name: image_sample_l_not_merged_2 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 - %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = PRED_COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %5:vgpr_32 = COPY %2.sub3 + %5:vgpr_32 = PRED_COPY %2.sub3 %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -254,11 +254,11 @@ body: | name: image_sample_l_not_merged_3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -272,11 +272,11 @@ body: | name: image_sample_l_not_merged_4 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -290,11 +290,11 @@ body: | name: image_sample_l_not_merged_5 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -308,11 +308,11 @@ body: | name: image_sample_l_not_merged_6 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -326,11 +326,11 @@ body: | name: image_sample_l_not_merged_7 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -344,11 +344,11 @@ body: | name: image_sample_l_not_merged_8 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -362,11 +362,11 @@ body: | name: image_sample_l_not_merged_9 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -380,11 +380,11 @@ body: | name: image_sample_l_not_merged_10 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -393,17 +393,17 @@ body: | # GFX9-LABEL: name: image_sample_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -412,17 +412,17 @@ body: | # GFX9-LABEL: name: image_sample_b_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -431,17 +431,17 @@ body: | # GFX9-LABEL: name: image_sample_b_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -450,17 +450,17 @@ body: | # GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -469,17 +469,17 @@ body: | # GFX9-LABEL: name: image_sample_b_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -488,17 +488,17 @@ body: | # GFX9-LABEL: name: image_sample_c_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -507,17 +507,17 @@ body: | # GFX9-LABEL: name: image_sample_cd_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -526,17 +526,17 @@ body: | # GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -545,17 +545,17 @@ body: | # GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -564,17 +564,17 @@ body: | # GFX9-LABEL: name: image_sample_cd_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -583,17 +583,17 @@ body: | # GFX9-LABEL: name: image_sample_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -602,17 +602,17 @@ body: | # GFX9-LABEL: name: image_sample_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -621,17 +621,17 @@ body: | # GFX9-LABEL: name: image_sample_c_b_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -640,17 +640,17 @@ body: | # GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -659,17 +659,17 @@ body: | # GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -678,17 +678,17 @@ body: | # GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_b_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -697,17 +697,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cd_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -716,17 +716,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -735,17 +735,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -754,17 +754,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cd_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -773,17 +773,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -792,17 +792,17 @@ body: | # GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -811,17 +811,17 @@ body: | # GFX9-LABEL: name: image_sample_c_d_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -830,17 +830,17 @@ body: | # GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -849,17 +849,17 @@ body: | # GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -868,17 +868,17 @@ body: | # GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -887,17 +887,17 @@ body: | # GFX9-LABEL: name: image_sample_c_l_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -906,17 +906,17 @@ body: | # GFX9-LABEL: name: image_sample_c_lz_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -925,17 +925,17 @@ body: | # GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -944,17 +944,17 @@ body: | # GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -963,17 +963,17 @@ body: | # GFX9-LABEL: name: image_sample_c_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_c_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -982,17 +982,17 @@ body: | # GFX9-LABEL: name: image_sample_d_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1001,17 +1001,17 @@ body: | # GFX9-LABEL: name: image_sample_d_cl_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1020,17 +1020,17 @@ body: | # GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_cl_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1039,17 +1039,17 @@ body: | # GFX9-LABEL: name: image_sample_d_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_d_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1058,17 +1058,17 @@ body: | # GFX9-LABEL: name: image_sample_lz_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1077,17 +1077,17 @@ body: | # GFX9-LABEL: name: image_sample_lz_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_lz_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1096,17 +1096,17 @@ body: | # GFX9-LABEL: name: image_sample_l_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_l_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) @@ -1115,17 +1115,17 @@ body: | # GFX9-LABEL: name: image_sample_o_merged_v1v3 # GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %8.sub1_sub2_sub3 name: image_sample_o_merged_v1v3 body: | bb.0.entry: - %0:sgpr_64 = COPY $sgpr0_sgpr1 + %0:sgpr_64 = PRED_COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 - %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %2:sgpr_128 = PRED_COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 - %4:vgpr_32 = COPY %2.sub3 + %4:vgpr_32 = PRED_COPY %2.sub3 %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir index 1a44f37155dad..1ba6f1e288203 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir @@ -8,15 +8,15 @@ # GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx9_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -25,15 +25,15 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx9_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -42,15 +42,15 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx9_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -59,15 +59,15 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_x_xy # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx9_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -76,15 +76,15 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_xy_x # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx9_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -94,16 +94,16 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_x_x # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx9_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -112,16 +112,16 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx9_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -131,28 +131,28 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_float_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -168,28 +168,28 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_sint_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -205,28 +205,28 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_load_uint_32 # GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx9_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -241,10 +241,10 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -260,10 +260,10 @@ body: | name: gfx9_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -278,10 +278,10 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -295,10 +295,10 @@ body: | name: gfx9_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -320,14 +320,14 @@ name: gfx9_tbuffer_store_x_xyz body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -345,14 +345,14 @@ name: gfx9_tbuffer_store_xyz_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) @@ -369,14 +369,14 @@ name: gfx9_tbuffer_store_xy_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -393,14 +393,14 @@ name: gfx9_tbuffer_store_x_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -417,14 +417,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -440,14 +440,14 @@ name: gfx9_tbuffer_store_x_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -461,14 +461,14 @@ name: gfx9_tbuffer_store_x_x_format_32_32_32_32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -476,19 +476,19 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_store_float32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -503,19 +503,19 @@ name: gfx9_tbuffer_store_float32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -530,19 +530,19 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_store_sint32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -557,19 +557,19 @@ name: gfx9_tbuffer_store_sint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -584,19 +584,19 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_store_uint32 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -611,19 +611,19 @@ name: gfx9_tbuffer_store_uint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -638,19 +638,19 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_data_format_mismatch -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -665,19 +665,19 @@ name: gfx9_tbuffer_store_not_merged_data_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -692,19 +692,19 @@ body: | --- # GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_num_format_mismatch -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -719,19 +719,19 @@ name: gfx9_tbuffer_store_not_merged_num_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -752,10 +752,10 @@ body: | name: gfx9_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -769,10 +769,10 @@ body: | name: gfx9_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -786,10 +786,10 @@ body: | name: gfx9_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -805,11 +805,11 @@ body: | name: gfx9_tbuffer_load_merge_across_swizzled_store body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 - %4:vgpr_32 = COPY $vgpr0 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 + %4:vgpr_32 = PRED_COPY $vgpr0 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %5:sgpr_128, 0, 6, 116, 0, 0, 1, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -824,15 +824,15 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx10_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -841,15 +841,15 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx10_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) @@ -858,15 +858,15 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx10_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -875,15 +875,15 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_x_xy # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx10_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) @@ -892,15 +892,15 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_xy_x # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx10_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -910,16 +910,16 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_x_x # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx10_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -928,16 +928,16 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx10_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -947,28 +947,28 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_float_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -984,28 +984,28 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_sint_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1021,28 +1021,28 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_load_uint_32 # GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx10_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1057,10 +1057,10 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1076,10 +1076,10 @@ body: | name: gfx10_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1094,10 +1094,10 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1111,10 +1111,10 @@ body: | name: gfx10_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1138,14 +1138,14 @@ name: gfx10_tbuffer_store_x_xyz body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1162,14 +1162,14 @@ name: gfx10_tbuffer_store_xyz_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) @@ -1186,14 +1186,14 @@ name: gfx10_tbuffer_store_xy_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -1210,14 +1210,14 @@ name: gfx10_tbuffer_store_x_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1234,14 +1234,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1257,14 +1257,14 @@ name: gfx10_tbuffer_store_x_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1278,14 +1278,14 @@ name: gfx10_tbuffer_store_x_x_format_32_32_32_32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1293,19 +1293,19 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_store_float32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1320,19 +1320,19 @@ name: gfx10_tbuffer_store_float32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1347,19 +1347,19 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_store_sint32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1374,19 +1374,19 @@ name: gfx10_tbuffer_store_sint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1401,19 +1401,19 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_store_uint32 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -1428,19 +1428,19 @@ name: gfx10_tbuffer_store_uint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1455,19 +1455,19 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_data_format_mismatch -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1482,19 +1482,19 @@ name: gfx10_tbuffer_store_not_merged_data_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1509,19 +1509,19 @@ body: | --- # GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_num_format_mismatch -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1536,19 +1536,19 @@ name: gfx10_tbuffer_store_not_merged_num_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -1569,10 +1569,10 @@ body: | name: gfx10_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1586,10 +1586,10 @@ body: | name: gfx10_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1603,10 +1603,10 @@ body: | name: gfx10_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1621,15 +1621,15 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY killed %7.sub1_sub2_sub3 name: gfx11_tbuffer_load_x_xyz body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 60, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) @@ -1638,15 +1638,15 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_xyz_x # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %7.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub3 name: gfx11_tbuffer_load_xyz_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 60, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) @@ -1655,15 +1655,15 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub2_sub3 name: gfx11_tbuffer_load_xy_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 50, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) @@ -1672,15 +1672,15 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_x_xy # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 60, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY killed %7.sub1_sub2 name: gfx11_tbuffer_load_x_xy body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 50, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) @@ -1689,15 +1689,15 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_xy_x # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 60, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %7.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub2 name: gfx11_tbuffer_load_xy_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1707,16 +1707,16 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_x_x # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx11_tbuffer_load_x_x body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1725,16 +1725,16 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_x_x_format_32_32_32_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %7.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %7.sub1 name: gfx11_tbuffer_load_x_x_format_32_32_32_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 63, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1744,28 +1744,28 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_float_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 60, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_float_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1781,28 +1781,28 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_sint_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 49, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 62, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 59, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_sint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1818,28 +1818,28 @@ body: | # GFX11-LABEL: name: gfx11_tbuffer_load_uint_32 # GFX11: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %14.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %14.sub1 # GFX11: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 61, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX11: %{{[0-9]+}}:vreg_96 = PRED_COPY %17.sub0_sub1_sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %17.sub3 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %16.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %16.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %15.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %15.sub1 # GFX11: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 58, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) -# GFX11: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY killed %18.sub1 +# GFX11: %{{[0-9]+}}:vreg_64 = PRED_COPY %19.sub0_sub1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %19.sub2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY %18.sub0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY killed %18.sub1 name: gfx11_tbuffer_load_uint_32 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1854,10 +1854,10 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_data_format_mismatch -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1873,10 +1873,10 @@ body: | name: gfx11_tbuffer_load_not_merged_data_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1891,10 +1891,10 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_num_format_mismatch -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) # GFX11: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) @@ -1908,10 +1908,10 @@ body: | name: gfx11_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -1935,14 +1935,14 @@ name: gfx11_tbuffer_store_x_xyz body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -1959,14 +1959,14 @@ name: gfx11_tbuffer_store_xyz_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 60, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) @@ -1983,14 +1983,14 @@ name: gfx11_tbuffer_store_xy_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 @@ -2007,14 +2007,14 @@ name: gfx11_tbuffer_store_x_xy body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2031,14 +2031,14 @@ body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) @@ -2054,14 +2054,14 @@ name: gfx11_tbuffer_store_x_x body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2075,14 +2075,14 @@ name: gfx11_tbuffer_store_x_x_format_32_32_32_32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 63, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2090,19 +2090,19 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_store_float32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 50, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2117,19 +2117,19 @@ name: gfx11_tbuffer_store_float32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2144,19 +2144,19 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_store_sint32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 49, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2171,19 +2171,19 @@ name: gfx11_tbuffer_store_sint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2198,19 +2198,19 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_store_uint32 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 # GFX11: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 48, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) @@ -2225,19 +2225,19 @@ name: gfx11_tbuffer_store_uint32 body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2252,19 +2252,19 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_store_not_merged_data_format_mismatch -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -2279,19 +2279,19 @@ name: gfx11_tbuffer_store_not_merged_data_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2306,19 +2306,19 @@ body: | --- # GFX11-LABEL: name: gfx11_tbuffer_store_not_merged_num_format_mismatch -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 -# GFX11: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 -# GFX11: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr8 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr7 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr6 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr5 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr4 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr3 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr2 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr1 +# GFX11: %{{[0-9]+}}:vgpr_32 = PRED_COPY $vgpr0 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr3 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr2 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr1 +# GFX11: %{{[0-9]+}}:sgpr_32 = PRED_COPY $sgpr0 # GFX11: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) # GFX11: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) @@ -2333,19 +2333,19 @@ name: gfx11_tbuffer_store_not_merged_num_format_mismatch body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - %12:vgpr_32 = COPY $vgpr8 - %11:vgpr_32 = COPY $vgpr7 - %10:vgpr_32 = COPY $vgpr6 - %9:vgpr_32 = COPY $vgpr5 - %8:vgpr_32 = COPY $vgpr4 - %7:vgpr_32 = COPY $vgpr3 - %6:vgpr_32 = COPY $vgpr2 - %5:vgpr_32 = COPY $vgpr1 - %4:vgpr_32 = COPY $vgpr0 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %12:vgpr_32 = PRED_COPY $vgpr8 + %11:vgpr_32 = PRED_COPY $vgpr7 + %10:vgpr_32 = PRED_COPY $vgpr6 + %9:vgpr_32 = PRED_COPY $vgpr5 + %8:vgpr_32 = PRED_COPY $vgpr4 + %7:vgpr_32 = PRED_COPY $vgpr3 + %6:vgpr_32 = PRED_COPY $vgpr2 + %5:vgpr_32 = PRED_COPY $vgpr1 + %4:vgpr_32 = PRED_COPY $vgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) @@ -2366,10 +2366,10 @@ body: | name: gfx11_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -2383,10 +2383,10 @@ body: | name: gfx11_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) @@ -2400,10 +2400,10 @@ body: | name: gfx11_tbuffer_load_merge_across_swizzle body: | bb.0.entry: - %0:sgpr_32 = COPY $sgpr0 - %1:sgpr_32 = COPY $sgpr1 - %2:sgpr_32 = COPY $sgpr2 - %3:sgpr_32 = COPY $sgpr3 + %0:sgpr_32 = PRED_COPY $sgpr0 + %1:sgpr_32 = PRED_COPY $sgpr1 + %2:sgpr_32 = PRED_COPY $sgpr2 + %3:sgpr_32 = PRED_COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll index 28facb38cff57..2575fd4f3a33b 100644 --- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll +++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll @@ -28,20 +28,21 @@ store i32 0, i32 addrspace(3)* @used_by_kernel define amdgpu_kernel void @withcall() { ; GFX9-LABEL: withcall: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s10, -1 -; GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; GFX9-NEXT: s_add_u32 s8, s8, s3 -; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: s_getpc_b64 s[2:3] -; GFX9-NEXT: s_add_u32 s2, s2, nonkernel@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s3, s3, nonkernel@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s14, -1 +; GFX9-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-NEXT: s_add_u32 s12, s12, s3 +; GFX9-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-NEXT: s_add_u32 s8, s0, 36 +; GFX9-NEXT: s_addc_u32 s9, s1, 0 +; GFX9-NEXT: s_getpc_b64 s[0:1] +; GFX9-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[12:13] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-NEXT: s_mov_b64 s[2:3], s[14:15] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: ds_write_b32 v0, v0 offset:8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -50,20 +51,21 @@ define amdgpu_kernel void @withcall() { ; ; GFX10-LABEL: withcall: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s10, -1 -; GFX10-NEXT: s_mov_b32 s11, 0x31c16000 -; GFX10-NEXT: s_add_u32 s8, s8, s3 -; GFX10-NEXT: s_addc_u32 s9, s9, 0 -; GFX10-NEXT: s_getpc_b64 s[2:3] -; GFX10-NEXT: s_add_u32 s2, s2, nonkernel@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s3, s3, nonkernel@gotpcrel32@hi+12 +; GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s14, -1 +; GFX10-NEXT: s_mov_b32 s15, 0x31c16000 +; GFX10-NEXT: s_add_u32 s12, s12, s3 +; GFX10-NEXT: s_addc_u32 s13, s13, 0 +; GFX10-NEXT: s_add_u32 s8, s0, 36 +; GFX10-NEXT: s_addc_u32 s9, s1, 0 +; GFX10-NEXT: s_getpc_b64 s[0:1] +; GFX10-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 -; GFX10-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX10-NEXT: s_mov_b64 s[0:1], s[8:9] -; GFX10-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX10-NEXT: s_mov_b64 s[0:1], s[12:13] +; GFX10-NEXT: s_mov_b64 s[2:3], s[14:15] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: ds_write_b32 v0, v0 offset:8 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -72,21 +74,22 @@ define amdgpu_kernel void @withcall() { ; ; G_GFX9-LABEL: withcall: ; G_GFX9: ; %bb.0: -; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_mov_b32 s10, -1 -; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_add_u32 s8, s8, s3 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] +; G_GFX9-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; G_GFX9-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; G_GFX9-NEXT: s_mov_b32 s14, -1 +; G_GFX9-NEXT: s_mov_b32 s15, 0xe00000 +; G_GFX9-NEXT: s_add_u32 s12, s12, s3 +; G_GFX9-NEXT: s_addc_u32 s13, s13, 0 +; G_GFX9-NEXT: s_add_u32 s8, s0, 36 +; G_GFX9-NEXT: s_addc_u32 s9, s1, 0 ; G_GFX9-NEXT: s_getpc_b64 s[0:1] ; G_GFX9-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4 ; G_GFX9-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12 ; G_GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; G_GFX9-NEXT: s_mov_b64 s[0:1], s[8:9] +; G_GFX9-NEXT: s_mov_b64 s[0:1], s[12:13] ; G_GFX9-NEXT: v_mov_b32_e32 v0, 0 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 8 -; G_GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] +; G_GFX9-NEXT: s_mov_b64 s[2:3], s[14:15] ; G_GFX9-NEXT: s_mov_b32 s32, 0 ; G_GFX9-NEXT: ds_write_b32 v1, v0 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -95,21 +98,22 @@ define amdgpu_kernel void @withcall() { ; ; G_GFX10-LABEL: withcall: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX10-NEXT: s_mov_b32 s10, -1 -; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 -; G_GFX10-NEXT: s_add_u32 s8, s8, s3 -; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX10-NEXT: s_mov_b64 s[6:7], s[0:1] +; G_GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; G_GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; G_GFX10-NEXT: s_mov_b32 s14, -1 +; G_GFX10-NEXT: s_mov_b32 s15, 0x31c16000 +; G_GFX10-NEXT: s_add_u32 s12, s12, s3 +; G_GFX10-NEXT: s_addc_u32 s13, s13, 0 +; G_GFX10-NEXT: s_add_u32 s8, s0, 36 +; G_GFX10-NEXT: s_addc_u32 s9, s1, 0 ; G_GFX10-NEXT: s_getpc_b64 s[0:1] ; G_GFX10-NEXT: s_add_u32 s0, s0, nonkernel@gotpcrel32@lo+4 ; G_GFX10-NEXT: s_addc_u32 s1, s1, nonkernel@gotpcrel32@hi+12 ; G_GFX10-NEXT: v_mov_b32_e32 v0, 0 ; G_GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, 8 -; G_GFX10-NEXT: s_mov_b64 s[0:1], s[8:9] -; G_GFX10-NEXT: s_mov_b64 s[2:3], s[10:11] +; G_GFX10-NEXT: s_mov_b64 s[0:1], s[12:13] +; G_GFX10-NEXT: s_mov_b64 s[2:3], s[14:15] ; G_GFX10-NEXT: s_mov_b32 s32, 0 ; G_GFX10-NEXT: ds_write_b32 v1, v0 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir index 14a5f17f7deff..219fc0b91f07c 100644 --- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir @@ -17,13 +17,13 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -65,13 +65,13 @@ body: | ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -113,13 +113,13 @@ body: | ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -157,13 +157,13 @@ body: | ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: GLOBAL_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -205,13 +205,13 @@ body: | ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -252,13 +252,13 @@ body: | ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 ; GCN-NEXT: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE1]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE1]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -297,13 +297,13 @@ body: | ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -342,13 +342,13 @@ body: | ; GCN-NEXT: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 - ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec - ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub0 + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY [[PHI]].sub1 + ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY]], 1, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PRED_COPY1]], 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_NE_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U64_e64 [[REG_SEQUENCE]], 0, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN-NEXT: [[PRED_COPY2:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U64_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -388,7 +388,7 @@ body: | ; GCN-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_AND_B32_e64_]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -428,7 +428,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_AND_B32_e64_]], 0, implicit $exec - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[V_AND_B32_e64_]], implicit $exec ; GCN-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index b8bb3a5a242a1..fd2c4ef3d20f0 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W32 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64 @@ -7,122 +8,380 @@ ; Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions. -; W64-LABEL: mubuf_vgpr -; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; W64: [[LOOPBB:.LBB[0-9]+_[0-9]+]]: -; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] -; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W64: s_xor_b64 exec, exec, [[AND]] -; W64: s_cbranch_execnz [[LOOPBB]] -; W64: s_mov_b64 exec, [[SAVEEXEC]] -; W64: v_mov_b32_e32 v0, [[RES]] - -; W32-LABEL: mubuf_vgpr -; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo -; W32: [[LOOPBB:.LBB[0-9]+_[0-9]+]]: -; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]] -; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]] -; W32: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]] -; W32: s_cbranch_execnz [[LOOPBB]] -; W32: s_mov_b32 exec_lo, [[SAVEEXEC]] -; W32: v_mov_b32_e32 v0, [[RES]] - define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { +; W64-O0-LABEL: mubuf_vgpr: +; W64-O0: ; %bb.0: +; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: ; implicit-def: $vgpr5 +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v4, v3 +; W64-O0-NEXT: v_mov_b32_e32 v5, v2 +; W64-O0-NEXT: v_mov_b32_e32 v6, v1 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v6 +; W64-O0-NEXT: v_mov_b32_e32 v2, v5 +; W64-O0-NEXT: v_mov_b32_e32 v3, v4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; W64-O0-NEXT: s_mov_b32 s4, 0 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 +; W64-O0-NEXT: s_mov_b32 s4, s8 +; W64-O0-NEXT: s_mov_b32 s5, s12 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] +; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 +; W64-O0-NEXT: s_mov_b32 s9, s12 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] +; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 3 +; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execnz .LBB0_1 +; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 2 +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: ; kill: killed $vgpr1 +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_setpc_b64 s[30:31] %call = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1 ret float %call } -; W64-LABEL: mubuf_vgpr_adjacent_in_block - -; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; W64: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: -; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] -; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W64: s_xor_b64 exec, exec, [[SAVE]] -; W64: s_cbranch_execnz [[LOOPBB0]] - -; W64: s_mov_b64 exec, [[SAVEEXEC]] -; FIXME: redundant s_mov -; W64: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec - -; W64: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: -; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] -; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64: buffer_load_format_x [[RES1:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W64: s_xor_b64 exec, exec, [[SAVE]] -; W64: s_cbranch_execnz [[LOOPBB1]] - -; W64: s_mov_b64 exec, [[SAVEEXEC]] -; W64-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off -; W64-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off - - -; W32-LABEL: mubuf_vgpr_adjacent_in_block - -; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo -; W32: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: -; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]] -; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]] -; W32: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]] -; W32: s_cbranch_execnz [[LOOPBB0]] - -; W32: s_mov_b32 exec_lo, [[SAVEEXEC]] -; FIXME: redundant s_mov -; W32: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo - -; W32: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: -; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]] -; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]] -; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]] -; W32: s_cbranch_execnz [[LOOPBB1]] - -; W32: s_mov_b32 exec_lo, [[SAVEEXEC]] -; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off -; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off - define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %out0, float addrspace(1)* %out1) #0 { +; W64-O0-LABEL: mubuf_vgpr_adjacent_in_block: +; W64-O0: ; %bb.0: ; %entry +; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: ; implicit-def: $vgpr13 +; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v13, v4 +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v6, v3 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v7, v2 +; W64-O0-NEXT: v_mov_b32_e32 v8, v1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v2, v0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v14, v5 +; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_mov_b32_e32 v15, v4 +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_mov_b32_e32 v16, v3 +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v3, v8 +; W64-O0-NEXT: v_mov_b32_e32 v4, v7 +; W64-O0-NEXT: v_mov_b32_e32 v5, v6 +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v2, v12 +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v10 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; W64-O0-NEXT: s_mov_b32 s4, 0 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 +; W64-O0-NEXT: s_mov_b32 s4, s8 +; W64-O0-NEXT: s_mov_b32 s5, s12 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] +; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 +; W64-O0-NEXT: s_mov_b32 s9, s12 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] +; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 3 +; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execnz .LBB1_1 +; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v0, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v0, 2 +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: v_writelane_b32 v0, s4, 9 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 10 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 +; W64-O0-NEXT: s_mov_b32 s4, s8 +; W64-O0-NEXT: s_mov_b32 s5, s12 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] +; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 +; W64-O0-NEXT: s_mov_b32 s9, s12 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s8, 11 +; W64-O0-NEXT: v_writelane_b32 v0, s9, 12 +; W64-O0-NEXT: v_writelane_b32 v0, s10, 13 +; W64-O0-NEXT: v_writelane_b32 v0, s11, 14 +; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] +; W64-O0-NEXT: v_writelane_b32 v0, s4, 15 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 16 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 15 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 16 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s8, v1, 11 +; W64-O0-NEXT: v_readlane_b32 s9, v1, 12 +; W64-O0-NEXT: v_readlane_b32 s10, v1, 13 +; W64-O0-NEXT: v_readlane_b32 s11, v1, 14 +; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 3 +; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execnz .LBB1_4 +; W64-O0-NEXT: ; %bb.6: +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v0, 9 +; W64-O0-NEXT: v_readlane_b32 s5, v0, 10 +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: global_store_dword v[3:4], v5, off +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: global_store_dword v[0:1], v2, off +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; kill: killed $vgpr0 +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_setpc_b64 s[30:31] entry: %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1 %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) #1 @@ -131,183 +390,300 @@ entry: ret void } - -; W64-LABEL: mubuf_vgpr_outside_entry - -; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s{{[0-9]+}} -; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec - -; W64: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: -; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] -; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W64: s_xor_b64 exec, exec, [[SAVE]] -; W64: s_cbranch_execnz [[LOOPBB0]] - -; W64: s_mov_b64 exec, [[SAVEEXEC]] -; W64: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]] - -; W64: ; %bb.{{[0-9]+}}: -; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s{{[0-9]+}} -; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec - -; W64: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: -; W64-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W64-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W64: v_cmp_eq_u64_e32 vcc, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] -; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64: buffer_load_format_x [[RES]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W64: s_xor_b64 exec, exec, [[SAVE]] -; W64: s_cbranch_execnz [[LOOPBB1]] - -; W64: s_mov_b64 exec, [[SAVEEXEC]] - -; W64: [[TERMBB]]: -; W64: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off - - -; W32-LABEL: mubuf_vgpr_outside_entry - -; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4 -; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo - -; W32: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: -; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]] -; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]] -; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]] -; W32: s_cbranch_execnz [[LOOPBB0]] - -; W32: s_mov_b32 exec_lo, [[SAVEEXEC]] -; W32: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]] - -; W32: ; %bb.{{[0-9]+}}: -; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4 -; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo - -; W32: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: -; W32-DAG: v_readfirstlane_b32 s[[SRSRC0:[0-9]+]], v[[VRSRC0:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC1:[0-9]+]], v[[VRSRC1:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC2:[0-9]+]], v[[VRSRC2:[0-9]+]] -; W32-DAG: v_readfirstlane_b32 s[[SRSRC3:[0-9]+]], v[[VRSRC3:[0-9]+]] -; W32: v_cmp_eq_u64_e32 vcc_lo, s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]] -; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]] -; W32: buffer_load_format_x [[RES]], [[IDX]], s[[[SRSRC0]]:[[SRSRC3]]], 0 idxen -; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]] -; W32: s_cbranch_execnz [[LOOPBB1]] - -; W32: s_mov_b32 exec_lo, [[SAVEEXEC]] - -; W32: [[TERMBB]]: -; W32: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off - - -; Confirm spills do not occur between the XOR and branch that terminate the -; waterfall loop BBs. - -; W64-O0-LABEL: mubuf_vgpr_outside_entry - -; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}} -; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}} -; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill - -; W64-O0: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] -; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] -; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] -; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] -; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] -; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload -; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen -; W64-O0: s_waitcnt vmcnt(0) -; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill -; W64-O0: s_xor_b64 exec, exec, [[SAVE]] -; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB0]] - -; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]] -; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload -; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill -; W64-O0: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]] - -; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1 -; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill -; W64-O0-DAG: s_mov_b64 s[[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]], exec -; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]] -; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] - -; W64-O0: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] -; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC0]]:[[SRSRC1]]], v[[[VRSRC0]]:[[VRSRC1]]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP3:[0-9]+]], v[[VRSRC3]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC2:[0-9]+]], s[[SRSRCTMP2]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] -; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s[[[SRSRC2]]:[[SRSRC3]]], v[[[VRSRC2]]:[[VRSRC3]]] -; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] -; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] -; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] -; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload -; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen -; W64-O0: s_waitcnt vmcnt(0) -; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill -; W64-O0: s_xor_b64 exec, exec, [[SAVE]] -; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB1]] - -; W64-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]] -; W64-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]] -; W64-O0: s_mov_b64 exec, s[[[SAVEEXEC0]]:[[SAVEEXEC1]]] -; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload -; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Spill - -; W64-O0: [[TERMBB]]: -; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Reload -; W64-O0: global_store_dword v[{{[0-9]+:[0-9]+}}], [[RES]], off - define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %in, float addrspace(1)* %out) #0 { +; W64-O0-LABEL: mubuf_vgpr_outside_entry: +; W64-O0: ; %bb.0: ; %entry +; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: ; implicit-def: $vgpr8 +; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v8, v5 +; W64-O0-NEXT: v_mov_b32_e32 v5, v4 +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v9, v3 +; W64-O0-NEXT: v_mov_b32_e32 v10, v2 +; W64-O0-NEXT: v_mov_b32_e32 v11, v1 +; W64-O0-NEXT: v_mov_b32_e32 v5, v0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v8 +; W64-O0-NEXT: v_mov_b32_e32 v2, v6 +; W64-O0-NEXT: v_mov_b32_e32 v3, v7 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v6, v11 +; W64-O0-NEXT: v_mov_b32_e32 v7, v10 +; W64-O0-NEXT: v_mov_b32_e32 v8, v9 +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; implicit-def: $sgpr4 +; W64-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v5, v12 +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; W64-O0-NEXT: ;;#ASMSTART +; W64-O0-NEXT: s_mov_b32 s4, 17 +; W64-O0-NEXT: ;;#ASMEND +; W64-O0-NEXT: s_mov_b32 s5, s4 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s5, 0 +; W64-O0-NEXT: s_mov_b32 s5, 0 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s4, 2 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 3 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 +; W64-O0-NEXT: s_mov_b32 s4, s8 +; W64-O0-NEXT: s_mov_b32 s5, s12 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] +; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 +; W64-O0-NEXT: s_mov_b32 s9, s12 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s8, 4 +; W64-O0-NEXT: v_writelane_b32 v0, s9, 5 +; W64-O0-NEXT: v_writelane_b32 v0, s10, 6 +; W64-O0-NEXT: v_writelane_b32 v0, s11, 7 +; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] +; W64-O0-NEXT: v_writelane_b32 v0, s4, 8 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 9 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 8 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 9 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s8, v1, 4 +; W64-O0-NEXT: v_readlane_b32 s9, v1, 5 +; W64-O0-NEXT: v_readlane_b32 s10, v1, 6 +; W64-O0-NEXT: v_readlane_b32 s11, v1, 7 +; W64-O0-NEXT: v_readlane_b32 s6, v1, 1 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 3 +; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execnz .LBB2_1 +; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s6, v1, 2 +; W64-O0-NEXT: v_readlane_b32 s7, v1, 3 +; W64-O0-NEXT: s_mov_b64 exec, s[6:7] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s4, v1, 1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b32 s5, 0x3ff +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5 +; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s4, 10 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 11 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execz .LBB2_8 +; W64-O0-NEXT: ; %bb.4: ; %bb1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v0, 0 +; W64-O0-NEXT: s_mov_b32 s5, 0 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 12 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; W64-O0-NEXT: s_mov_b64 s[4:5], exec +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s4, 13 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 14 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 +; W64-O0-NEXT: s_mov_b32 s4, s8 +; W64-O0-NEXT: s_mov_b32 s5, s12 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] +; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] +; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 +; W64-O0-NEXT: s_mov_b32 s9, s12 +; W64-O0-NEXT: s_mov_b32 s10, s7 +; W64-O0-NEXT: s_mov_b32 s11, s6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v0, s8, 15 +; W64-O0-NEXT: v_writelane_b32 v0, s9, 16 +; W64-O0-NEXT: v_writelane_b32 v0, s10, 17 +; W64-O0-NEXT: v_writelane_b32 v0, s11, 18 +; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] +; W64-O0-NEXT: v_writelane_b32 v0, s4, 19 +; W64-O0-NEXT: v_writelane_b32 v0, s5, 20 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v1, 19 +; W64-O0-NEXT: v_readlane_b32 s5, v1, 20 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s8, v1, 15 +; W64-O0-NEXT: v_readlane_b32 s9, v1, 16 +; W64-O0-NEXT: v_readlane_b32 s10, v1, 17 +; W64-O0-NEXT: v_readlane_b32 s11, v1, 18 +; W64-O0-NEXT: v_readlane_b32 s6, v1, 12 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 3 +; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] +; W64-O0-NEXT: s_cbranch_execnz .LBB2_5 +; W64-O0-NEXT: ; %bb.7: +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v0, 13 +; W64-O0-NEXT: v_readlane_b32 s5, v0, 14 +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; W64-O0-NEXT: .LBB2_8: ; %bb2 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_readlane_b32 s4, v0, 10 +; W64-O0-NEXT: v_readlane_b32 s5, v0, 11 +; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: global_store_dword v[0:1], v2, off +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: ; kill: killed $vgpr0 +; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_setpc_b64 s[30:31] entry: %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1 @@ -330,3 +706,6 @@ declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i3 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; W32: {{.*}} +; W64: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir index 60327e5fad375..6437a8af0fa8a 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -30,20 +30,20 @@ body: | ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W64-NEXT: {{ $}} - ; W64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; W64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -64,33 +64,33 @@ body: | ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} - ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: ; W64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; W64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_IDXEN]] + ; W64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_IDXEN]] ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W32-LABEL: name: idxen ; W32: successors: %bb.1(0x80000000) ; W32-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W32-NEXT: {{ $}} - ; W32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; W32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W32-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -111,25 +111,25 @@ body: | ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} - ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: ; W32-NEXT: $exec_lo = S_MOV_B32 [[S_MOV_B32_]] - ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_IDXEN]] + ; W32-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W32-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_IDXEN]] ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %5:sreg_64 = COPY $sgpr30_sgpr31 - %4:vgpr_32 = COPY $vgpr4 - %3:vgpr_32 = COPY $vgpr3 - %2:vgpr_32 = COPY $vgpr2 - %1:vgpr_32 = COPY $vgpr1 - %0:vgpr_32 = COPY $vgpr0 + %5:sreg_64 = PRED_COPY $sgpr30_sgpr31 + %4:vgpr_32 = PRED_COPY $vgpr4 + %3:vgpr_32 = PRED_COPY $vgpr3 + %2:vgpr_32 = PRED_COPY $vgpr2 + %1:vgpr_32 = PRED_COPY $vgpr1 + %0:vgpr_32 = PRED_COPY $vgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec - $sgpr30_sgpr31 = COPY %5 - $vgpr0 = COPY %7 + $sgpr30_sgpr31 = PRED_COPY %5 + $vgpr0 = PRED_COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -150,20 +150,20 @@ body: | ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W64-NEXT: {{ $}} - ; W64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; W64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY3]], implicit $exec + ; W64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, killed [[PRED_COPY6]], %subreg.sub1, killed [[PRED_COPY7]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -184,33 +184,33 @@ body: | ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} - ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: ; W64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; W64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; W64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W32-LABEL: name: offen ; W32: successors: %bb.1(0x80000000) ; W32-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W32-NEXT: {{ $}} - ; W32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; W32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr4 + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W32-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY3]], implicit $exec + ; W32-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[PRED_COPY5]], %subreg.sub0, killed [[PRED_COPY6]], %subreg.sub1, killed [[PRED_COPY7]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -231,25 +231,25 @@ body: | ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} - ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: ; W32-NEXT: $exec_lo = S_MOV_B32 [[S_MOV_B32_]] - ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; W32-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W32-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %5:sreg_64 = COPY $sgpr30_sgpr31 - %4:vgpr_32 = COPY $vgpr4 - %3:vgpr_32 = COPY $vgpr3 - %2:vgpr_32 = COPY $vgpr2 - %1:vgpr_32 = COPY $vgpr1 - %0:vgpr_32 = COPY $vgpr0 + %5:sreg_64 = PRED_COPY $sgpr30_sgpr31 + %4:vgpr_32 = PRED_COPY $vgpr4 + %3:vgpr_32 = PRED_COPY $vgpr3 + %2:vgpr_32 = PRED_COPY $vgpr2 + %1:vgpr_32 = PRED_COPY $vgpr1 + %0:vgpr_32 = PRED_COPY $vgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec - $sgpr30_sgpr31 = COPY %5 - $vgpr0 = COPY %7 + $sgpr30_sgpr31 = PRED_COPY %5 + $vgpr0 = PRED_COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -270,20 +270,20 @@ body: | ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W64-NEXT: {{ $}} - ; W64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; W64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY3]], implicit $exec + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, killed [[PRED_COPY8]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: @@ -304,33 +304,33 @@ body: | ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} - ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: ; W64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; W64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; W64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W32-LABEL: name: bothen ; W32: successors: %bb.1(0x80000000) ; W32-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W32-NEXT: {{ $}} - ; W32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W32-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY3]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, killed [[PRED_COPY8]], %subreg.sub2, [[PRED_COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -351,25 +351,25 @@ body: | ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} - ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec + ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[PRED_COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: ; W32-NEXT: $exec_lo = S_MOV_B32 [[S_MOV_B32_]] - ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; W32-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W32-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %5:sreg_64 = COPY $sgpr30_sgpr31 - %4:vreg_64 = COPY $vgpr4_vgpr5 - %3:vgpr_32 = COPY $vgpr3 - %2:vgpr_32 = COPY $vgpr2 - %1:vgpr_32 = COPY $vgpr1 - %0:vgpr_32 = COPY $vgpr0 + %5:sreg_64 = PRED_COPY $sgpr30_sgpr31 + %4:vreg_64 = PRED_COPY $vgpr4_vgpr5 + %3:vgpr_32 = PRED_COPY $vgpr3 + %2:vgpr_32 = PRED_COPY $vgpr2 + %1:vgpr_32 = PRED_COPY $vgpr1 + %0:vgpr_32 = PRED_COPY $vgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec - $sgpr30_sgpr31 = COPY %5 - $vgpr0 = COPY %7 + $sgpr30_sgpr31 = PRED_COPY %5 + $vgpr0 = PRED_COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -389,71 +389,71 @@ body: | ; ADDR64-LABEL: name: addr64 ; ADDR64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; ADDR64-NEXT: {{ $}} - ; ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; ADDR64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; ADDR64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; ADDR64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; ADDR64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; ADDR64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 + ; ADDR64-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY9]].sub0, [[COPY1]].sub0, 0, implicit $exec - ; ADDR64-NEXT: %17:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY9]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; ADDR64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[PRED_COPY9]].sub0, [[PRED_COPY1]].sub0, 0, implicit $exec + ; ADDR64-NEXT: %17:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY9]].sub1, [[PRED_COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %17, %subreg.sub1 ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec - ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; ADDR64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; ADDR64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] ; ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W32-LABEL: name: addr64 ; W32: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W32-NEXT: {{ $}} - ; W32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W32-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; W32-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; W32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; W32-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 822173696 ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; W32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY9]].sub0, [[COPY1]].sub0, 0, implicit $exec - ; W32-NEXT: %17:vgpr_32, dead %20:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; W32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[PRED_COPY9]].sub0, [[PRED_COPY1]].sub0, 0, implicit $exec + ; W32-NEXT: %17:vgpr_32, dead %20:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[PRED_COPY9]].sub1, [[PRED_COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %17, %subreg.sub1 ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec - ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; W32-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W32-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %5:sreg_64 = COPY $sgpr30_sgpr31 - %4:vreg_64 = COPY $vgpr4_vgpr5 - %3:vgpr_32 = COPY $vgpr3 - %2:vgpr_32 = COPY $vgpr2 - %1:vgpr_32 = COPY $vgpr1 - %0:vgpr_32 = COPY $vgpr0 + %5:sreg_64 = PRED_COPY $sgpr30_sgpr31 + %4:vreg_64 = PRED_COPY $vgpr4_vgpr5 + %3:vgpr_32 = PRED_COPY $vgpr3 + %2:vgpr_32 = PRED_COPY $vgpr2 + %1:vgpr_32 = PRED_COPY $vgpr1 + %0:vgpr_32 = PRED_COPY $vgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, implicit $exec - $sgpr30_sgpr31 = COPY %5 - $vgpr0 = COPY %7 + $sgpr30_sgpr31 = PRED_COPY %5 + $vgpr0 = PRED_COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... @@ -474,48 +474,48 @@ body: | ; ADDR64-LABEL: name: offset ; ADDR64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; ADDR64-NEXT: {{ $}} - ; ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; ADDR64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; ADDR64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; ADDR64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; ADDR64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; ADDR64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; ADDR64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 + ; ADDR64-NEXT: [[PRED_COPY9:%[0-9]+]]:vreg_64 = PRED_COPY [[REG_SEQUENCE]].sub0_sub1 ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]].sub0, %subreg.sub0, [[COPY9]].sub1, %subreg.sub1 + ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[PRED_COPY9]].sub0, %subreg.sub0, [[PRED_COPY9]].sub1, %subreg.sub1 ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec - ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; ADDR64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; ADDR64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] ; ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W64-NO-ADDR64-LABEL: name: offset ; W64-NO-ADDR64: successors: %bb.1(0x80000000) ; W64-NO-ADDR64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W64-NO-ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W64-NO-ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W64-NO-ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W64-NO-ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W64-NO-ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W64-NO-ADDR64-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NO-ADDR64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NO-ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W64-NO-ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W64-NO-ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W64-NO-ADDR64-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W64-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NO-ADDR64-NEXT: {{ $}} ; W64-NO-ADDR64-NEXT: .1: @@ -542,27 +542,27 @@ body: | ; W64-NO-ADDR64-NEXT: {{ $}} ; W64-NO-ADDR64-NEXT: .3: ; W64-NO-ADDR64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; W64-NO-ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W64-NO-ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] + ; W64-NO-ADDR64-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W64-NO-ADDR64-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] ; W64-NO-ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; W32-LABEL: name: offset ; W32: successors: %bb.1(0x80000000) ; W32-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; W32-NEXT: {{ $}} - ; W32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W32-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W32-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W32-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $sgpr30_sgpr31 + ; W32-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_64 = PRED_COPY $vgpr4_vgpr5 + ; W32-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr3 + ; W32-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr2 + ; W32-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr1 + ; W32-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0 ; W32-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec - ; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec - ; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec - ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; W32-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY5]], implicit $exec + ; W32-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY4]], implicit $exec + ; W32-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[PRED_COPY2]], implicit $exec + ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[PRED_COPY6]], %subreg.sub0, killed [[PRED_COPY7]], %subreg.sub1, [[PRED_COPY3]], %subreg.sub2, killed [[PRED_COPY8]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: @@ -589,18 +589,18 @@ body: | ; W32-NEXT: {{ $}} ; W32-NEXT: .3: ; W32-NEXT: $exec_lo = S_MOV_B32 [[S_MOV_B32_]] - ; W32-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W32-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] + ; W32-NEXT: $sgpr30_sgpr31 = PRED_COPY [[PRED_COPY]] + ; W32-NEXT: $vgpr0 = PRED_COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - %5:sreg_64 = COPY $sgpr30_sgpr31 - %4:vreg_64 = COPY $vgpr4_vgpr5 - %3:vgpr_32 = COPY $vgpr3 - %2:vgpr_32 = COPY $vgpr2 - %1:vgpr_32 = COPY $vgpr1 - %0:vgpr_32 = COPY $vgpr0 + %5:sreg_64 = PRED_COPY $sgpr30_sgpr31 + %4:vreg_64 = PRED_COPY $vgpr4_vgpr5 + %3:vgpr_32 = PRED_COPY $vgpr3 + %2:vgpr_32 = PRED_COPY $vgpr2 + %1:vgpr_32 = PRED_COPY $vgpr1 + %0:vgpr_32 = PRED_COPY $vgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, implicit $exec - $sgpr30_sgpr31 = COPY %5 - $vgpr0 = COPY %7 + $sgpr30_sgpr31 = PRED_COPY %5 + $vgpr0 = PRED_COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index c36931ba798b7..12ab6e9b89876 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -188,11 +188,12 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-LABEL: slsr1_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 5 +; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s4, 5 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -228,11 +229,12 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: v_readlane_b32 s37, v40, 2 ; GFX9-NEXT: v_readlane_b32 s36, v40, 1 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: v_readlane_b32 s4, v40, 5 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 5 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %b = and i32 %b.arg, 16777215 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll index 97a8a62ef00ba..c72c65a6d62e1 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll @@ -27,11 +27,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-LABEL: csr_vgpr_spill_fp_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v1, s33, 2 +; CHECK-NEXT: s_mov_b32 s6, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 @@ -46,11 +46,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 -; CHECK-NEXT: s_addk_i32 s32, 0xfc00 -; CHECK-NEXT: v_readlane_b32 s33, v1, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_addk_i32 s32, 0xfc00 +; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: @@ -82,7 +82,7 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 { ; CHECK-LABEL: csr_vgpr_spill_fp_tailcall_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill @@ -95,7 +95,7 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 { ; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12 ; CHECK-NEXT: v_readlane_b32 s33, v1, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_setpc_b64 s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 4e59d7dee1b9e..ba2a4303d23e3 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -27,11 +27,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-LABEL: csr_vgpr_spill_fp_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v1, s33, 2 +; CHECK-NEXT: s_mov_b32 s6, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 @@ -50,11 +50,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: v_readlane_b32 s33, v1, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: @@ -90,7 +90,7 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 { ; CHECK-LABEL: csr_vgpr_spill_fp_tailcall_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill @@ -103,7 +103,7 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 { ; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12 ; CHECK-NEXT: v_readlane_b32 s33, v1, 0 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_setpc_b64 s[4:5] @@ -152,11 +152,11 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v1, s33, 2 +; CHECK-NEXT: s_mov_b32 s6, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 ; CHECK-NEXT: v_writelane_b32 v1, s31, 1 @@ -170,11 +170,11 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 -; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: v_readlane_b32 s33, v1, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -186,11 +186,11 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-LABEL: caller_save_vgpr_spill_fp: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v2, s33, 2 +; CHECK-NEXT: s_mov_b32 s7, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v2, s30, 0 ; CHECK-NEXT: v_writelane_b32 v2, s31, 1 @@ -204,11 +204,11 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: v_readlane_b32 s31, v2, 1 -; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: v_readlane_b32 s33, v2, 2 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; CHECK-NEXT: s_mov_b32 s33, s7 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index d7cb136fd5e09..cfeb1ab676c7f 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -11,12 +11,12 @@ declare void @external_void_func_i32(i32) #0 ; GCN: s_waitcnt ; Spill CSR VGPR used for SGPR spilling -; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-DAG: v_writelane_b32 v40, s33, 2 -; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_addk_i32 s32, 0x400 +; GCN-DAG: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2 ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 @@ -25,11 +25,12 @@ declare void @external_void_func_i32(i32) #0 ; GCN: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 +; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_i32_imm() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll index de6742d0b3ae4..6cb4c576be6d0 100644 --- a/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll +++ b/llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s ; CHECK: in function pixel_s{{.*}}: unsupported non-compute shaders with HSA define amdgpu_ps void @pixel_shader() #0 { @@ -14,3 +14,6 @@ define amdgpu_vs void @vertex_shader() #0 { define amdgpu_gs void @geometry_shader() #0 { ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index e197f728054c6..0adc2cebdb2ca 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -189,13 +189,14 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: .cfi_undefined 60 ; CHECK-NEXT: .cfi_undefined 61 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: .cfi_offset 2600, 0 -; CHECK-NEXT: s_mov_b64 exec, s[16:17] -; CHECK-NEXT: v_writelane_b32 v40, s33, 2 +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v40, s16, 2 ; CHECK-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x14, 0xe4, 0x08 ; -; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: .Ltmp0: @@ -217,12 +218,13 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: v_readlane_b32 s4, v40, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: v_readlane_b32 s33, v40, 2 ; CHECK-NEXT: .cfi_def_cfa_register 64 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index 9392f680dfa06..131dba575b86c 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -1,10 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s + +; XFAIL: * + +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,MUBUF %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch | FileCheck -check-prefixes=DEFAULTSIZE,FLATSCR %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=ASSUME1024,FLATSCR %s ; FIXME: Generated test checks do not check metadata at the end of the ; function, so this also includes manually added checks. @@ -51,6 +54,42 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_endpgm ; +; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4: +; DEFAULTSIZE-V5: ; %bb.0: ; %entry +; DEFAULTSIZE-V5-NEXT: s_add_u32 s0, s0, s9 +; DEFAULTSIZE-V5-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8 +; DEFAULTSIZE-V5-NEXT: s_addc_u32 s1, s1, 0 +; DEFAULTSIZE-V5-NEXT: s_movk_i32 s32, 0x400 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, 0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s8, 0 +; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB0_3 +; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0 +; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s9, 0 +; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB0_3 +; DEFAULTSIZE-V5-NEXT: ; %bb.2: ; %bb.1 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000 +; DEFAULTSIZE-V5-NEXT: s_lshl_b32 s7, s10, 2 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v1, 0 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, 1 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s6, s7 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6 +; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v0, v2, v0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: global_store_dword v1, v0, s[4:5] +; DEFAULTSIZE-V5-NEXT: .LBB0_3: ; %bb.2 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_endpgm +; ; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4: ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 @@ -110,8 +149,8 @@ bb.2: store volatile i32 0, i32 addrspace(1)* undef ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 -; DEFAULTSIZE: ; ScratchSize: 4112 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16 +; DEFAULTSIZE: ; ScratchSize: 16 ; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 16 ; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1 ; DEFAULTSIZE-V5: ; ScratchSize: 16 @@ -154,6 +193,40 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_endpgm ; +; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64: +; DEFAULTSIZE-V5: ; %bb.0: ; %entry +; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 +; DEFAULTSIZE-V5-NEXT: s_add_u32 s0, s0, s9 +; DEFAULTSIZE-V5-NEXT: s_addc_u32 s1, s1, 0 +; DEFAULTSIZE-V5-NEXT: s_movk_i32 s32, 0x1000 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, 0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_cmp_lg_u32 s6, 0 +; DEFAULTSIZE-V5-NEXT: s_cbranch_scc1 .LBB1_2 +; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000 +; DEFAULTSIZE-V5-NEXT: s_and_b32 s6, s6, 0xfffff000 +; DEFAULTSIZE-V5-NEXT: s_lshl_b32 s7, s7, 2 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v1, 0 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, 1 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s6, s7 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, s6 +; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v0, v2, v0 +; DEFAULTSIZE-V5-NEXT: s_waitcnt lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: global_store_dword v1, v0, s[4:5] +; DEFAULTSIZE-V5-NEXT: .LBB1_2: ; %bb.1 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_endpgm +; ; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64: ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 @@ -206,8 +279,8 @@ bb.1: ret void } -; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160 -; DEFAULTSIZE: ; ScratchSize: 4160 +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64 +; DEFAULTSIZE: ; ScratchSize: 64 ; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 64 ; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1 ; DEFAULTSIZE-V5: ; ScratchSize: 64 @@ -253,6 +326,42 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i ; MUBUF-NEXT: s_mov_b32 s33, s7 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; +; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align4: +; DEFAULTSIZE-V5: ; %bb.0: ; %entry +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s7, s33 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s32 +; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0x400 +; DEFAULTSIZE-V5-NEXT: s_and_saveexec_b64 s[4:5], vcc +; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB2_3 +; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0 +; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; DEFAULTSIZE-V5-NEXT: s_and_b64 exec, exec, vcc +; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB2_3 +; DEFAULTSIZE-V5-NEXT: ; %bb.2: ; %bb.1 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 0 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v3, s6 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 1 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4 +; DEFAULTSIZE-V5-NEXT: v_lshl_add_u32 v2, v4, 2, s6 +; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: v_and_b32_e32 v3, 0x3ff, v31 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6 +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v2, v2, v3 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v2, off +; DEFAULTSIZE-V5-NEXT: .LBB2_3: ; %bb.2 +; DEFAULTSIZE-V5-NEXT: s_or_b64 exec, exec, s[4:5] +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0xfc00 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s7 +; DEFAULTSIZE-V5-NEXT: s_setpc_b64 s[30:31] +; ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4: ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -349,6 +458,40 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, ; MUBUF-NEXT: s_mov_b32 s33, s7 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; +; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align64: +; DEFAULTSIZE-V5: ; %bb.0: ; %entry +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s7, s33 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s33, s32, 0xfc0 +; DEFAULTSIZE-V5-NEXT: s_and_b32 s33, s33, 0xfffff000 +; DEFAULTSIZE-V5-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0x2000 +; DEFAULTSIZE-V5-NEXT: s_and_saveexec_b64 s[4:5], vcc +; DEFAULTSIZE-V5-NEXT: s_cbranch_execz .LBB3_2 +; DEFAULTSIZE-V5-NEXT: ; %bb.1: ; %bb.0 +; DEFAULTSIZE-V5-NEXT: s_add_i32 s6, s32, 0x1000 +; DEFAULTSIZE-V5-NEXT: s_and_b32 s6, s6, 0xfffff000 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 0 +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v4, s6 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v2, 1 +; DEFAULTSIZE-V5-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:4 +; DEFAULTSIZE-V5-NEXT: v_lshl_add_u32 v2, v3, 2, s6 +; DEFAULTSIZE-V5-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; DEFAULTSIZE-V5-NEXT: v_and_b32_e32 v3, 0x3ff, v31 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s32, s6 +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: v_add_u32_e32 v2, v2, v3 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v2, off +; DEFAULTSIZE-V5-NEXT: .LBB3_2: ; %bb.1 +; DEFAULTSIZE-V5-NEXT: s_or_b64 exec, exec, s[4:5] +; DEFAULTSIZE-V5-NEXT: v_mov_b32_e32 v0, 0 +; DEFAULTSIZE-V5-NEXT: global_store_dword v[0:1], v0, off +; DEFAULTSIZE-V5-NEXT: s_waitcnt vmcnt(0) +; DEFAULTSIZE-V5-NEXT: s_addk_i32 s32, 0xe000 +; DEFAULTSIZE-V5-NEXT: s_mov_b32 s33, s7 +; DEFAULTSIZE-V5-NEXT: s_setpc_b64 s[30:31] +; ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align64: ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -406,3 +549,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ASSUME1024: {{.*}} +; DEFAULTSIZE: {{.*}} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index 691279f933e56..2be5ec2d37953 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : ; CHECK: s_endpgm @@ -85,3 +85,6 @@ define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) ali entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir index 404529d8c69ec..aa6b0ee477aba 100644 --- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir @@ -34,7 +34,7 @@ body: | %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) - %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7) S_ENDPGM 0, implicit %7 ... @@ -67,6 +67,6 @@ body: | %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6 - %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7) S_ENDPGM 0, implicit %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir index 0045fa68c4f09..58c9522b67726 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir @@ -95,10 +95,10 @@ body: | ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_LOAD_DWORDX2_IMM1]].sub0, [[REG_SEQUENCE]].sub0, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY3]], implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[PRED_COPY]], implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576 ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 @@ -372,10 +372,10 @@ body: | ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_LOAD_DWORDX2_IMM1]].sub0, [[REG_SEQUENCE]].sub0, implicit-def $vcc, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1 - ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY3]], implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub1 + ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[PRED_COPY]], implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[V_ADD_CO_U32_e32_]], implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[V_ADDC_U32_e32_]], %subreg.sub1 ; GCN-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 killed [[REG_SEQUENCE1]].sub0, 12, implicit $exec ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir index ca7be92a443df..3a848d83bd08f 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir @@ -18,7 +18,7 @@ body: | ; CHECK-NEXT: renamable $vcc = V_CMP_EQ_U32_e64 0, killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc - ; CHECK-NEXT: renamable $sgpr0_sgpr1 = COPY killed renamable $sgpr0_sgpr1, implicit $exec + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = PRED_COPY killed renamable $sgpr0_sgpr1, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir index b864f803e1041..b93de09748ddb 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = COPY killed renamable $sgpr4_sgpr5 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr6_sgpr7, implicit $exec - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr8_sgpr9, implicit $exec + ; CHECK-NEXT: $exec = PRED_COPY killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = PRED_COPY killed renamable $sgpr6_sgpr7, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr8_sgpr9, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir index 3e36c46c47ead..8c4fd1f3aa3be 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -137,7 +137,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -175,7 +175,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -212,7 +212,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -242,7 +242,7 @@ body: | # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_valu_middle liveins: @@ -251,7 +251,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -282,7 +282,7 @@ body: | # CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1 +# CHECK-NEXT: $exec = PRED_COPY $sgpr0_sgpr1 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_wrong_reg liveins: @@ -293,7 +293,7 @@ body: | $sgpr6 = S_MOV_B32 -1 $sgpr7 = S_MOV_B32 61440 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -321,7 +321,7 @@ body: | # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_and_saveexec_xor_modify_copy_to_exec @@ -331,7 +331,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -364,7 +364,7 @@ body: | # CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ name: optimize_if_and_saveexec_xor_live_out_setexec liveins: @@ -373,7 +373,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -401,9 +401,9 @@ body: | ... # CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} -# CHECK: $sgpr0_sgpr1 = COPY $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_unknown_saveexec @@ -413,7 +413,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc @@ -450,7 +450,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc @@ -478,7 +478,7 @@ body: | --- # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc -# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 +# CHECK-NEXT: $exec = PRED_COPY killed $sgpr2_sgpr3 # CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec name: optimize_if_andn2_saveexec_no_commute liveins: @@ -487,7 +487,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc @@ -515,7 +515,7 @@ body: | --- # A read from exec copy subreg prevents optimization # CHECK-LABEL: name: if_and_xor_read_exec_copy_subreg{{$}} -# CHECK: $sgpr0_sgpr1 = COPY $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec # CHECK-NEXT: $sgpr4 = S_MOV_B32 $sgpr1 name: if_and_xor_read_exec_copy_subreg liveins: @@ -524,7 +524,7 @@ body: | bb.0.main_body: liveins: $vgpr0 - $sgpr0_sgpr1 = COPY $exec + $sgpr0_sgpr1 = PRED_COPY $exec $sgpr4 = S_MOV_B32 $sgpr1 $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/optimize-kill.mir b/llvm/test/CodeGen/AMDGPU/optimize-kill.mir new file mode 100644 index 0000000000000..ab1bd3bc1a40d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/optimize-kill.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,1 -stop-after=virtregrewriter,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# The virtregrewriter pass should not insert a KILL when found an identical copy with an implicit reserved register. +# For the following instruction, +# %4:vgpr_32 = COPY %3:vgpr_32, implicit $exec +# if the register allocated for both the source and destination operands remain the same (vgpr0, for instance), there is +# no need to insert the KILL instruction as the implicit operand (exec) is a reserved register. +# $vgpr0 = KILL killed renamable $vgpr0, implicit $exec +--- +name: test +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GCN-LABEL: name: test + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $exec + ; GCN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc + ; GCN-NEXT: renamable $vgpr0 = COPY renamable $sgpr0 + ; GCN-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_IDXEN killed renamable $vgpr0, undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $exec = S_AND_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + renamable $sgpr2_sgpr3 = COPY $exec + $exec = S_WQM_B64 $exec, implicit-def $scc + %2:vgpr_32 = COPY killed renamable $sgpr0 + %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + %4:vgpr_32 = COPY %3:vgpr_32, implicit $exec + $exec = S_AND_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc + $vgpr0 = COPY %4 + SI_RETURN_TO_EPILOG killed $vgpr0 +... diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index a1148646b2cc3..f6fbbfebb1af8 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -12,20 +12,20 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908-NEXT: {{ $}} ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %26 - ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26 + ; REGALLOC-GFX908-NEXT: [[PRED_COPY:%[0-9]+]]:av_128 = PRED_COPY %26 ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def %23 ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[PRED_COPY]] + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; REGALLOC-GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX908-NEXT: [[PRED_COPY2:%[0-9]+]]:areg_128 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY2]], 0, 0, 0, implicit $mode, implicit $exec ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[PRED_COPY3:%[0-9]+]]:vreg_128 = PRED_COPY [[V_MFMA_I32_4X4X4I8_e64_]] + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[PRED_COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: S_ENDPGM 0 ; PEI-GFX908-LABEL: name: partial_copy ; PEI-GFX908: bb.0 (%ir-block.0): @@ -38,21 +38,21 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 - ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 - ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) ; PEI-GFX908-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec - ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX908-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; PEI-GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) - ; PEI-GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1 + ; PEI-GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1 ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = PRED_COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; PEI-GFX908-NEXT: S_ENDPGM 0 ; REGALLOC-GFX90A-LABEL: name: partial_copy @@ -61,15 +61,15 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX90A-NEXT: {{ $}} ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32 ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %25 - ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25 + ; REGALLOC-GFX90A-NEXT: [[PRED_COPY:%[0-9]+]]:av_128_align2 = PRED_COPY %25 ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def %23 ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[PRED_COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) - ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX90A-NEXT: [[PRED_COPY1:%[0-9]+]]:areg_128_align2 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) @@ -85,18 +85,18 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 - ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) - ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; PEI-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) - ; PEI-GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1 + ; PEI-GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; PEI-GFX90A-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 3b8cf0c31e575..e866ecfae33c4 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,95 +11,107 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: ; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s92, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s93, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s94, -1 +; GCN-NEXT: s_mov_b32 s95, 0xe8f000 +; GCN-NEXT: s_add_u32 s92, s92, s3 +; GCN-NEXT: s_addc_u32 s93, s93, 0 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: ; implicit-def: $vgpr2 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 8 -; GCN-NEXT: v_writelane_b32 v0, s5, 9 -; GCN-NEXT: v_writelane_b32 v0, s6, 10 -; GCN-NEXT: v_writelane_b32 v0, s7, 11 -; GCN-NEXT: v_writelane_b32 v0, s8, 12 -; GCN-NEXT: v_writelane_b32 v0, s9, 13 -; GCN-NEXT: v_writelane_b32 v0, s10, 14 -; GCN-NEXT: v_writelane_b32 v0, s11, 15 +; GCN-NEXT: v_writelane_b32 v2, s4, 8 +; GCN-NEXT: v_writelane_b32 v2, s5, 9 +; GCN-NEXT: v_writelane_b32 v2, s6, 10 +; GCN-NEXT: v_writelane_b32 v2, s7, 11 +; GCN-NEXT: v_writelane_b32 v2, s8, 12 +; GCN-NEXT: v_writelane_b32 v2, s9, 13 +; GCN-NEXT: v_writelane_b32 v2, s10, 14 +; GCN-NEXT: v_writelane_b32 v2, s11, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: v_writelane_b32 v2, s4, 16 +; GCN-NEXT: v_writelane_b32 v2, s5, 17 +; GCN-NEXT: v_writelane_b32 v2, s6, 18 +; GCN-NEXT: v_writelane_b32 v2, s7, 19 +; GCN-NEXT: v_writelane_b32 v2, s8, 20 +; GCN-NEXT: v_writelane_b32 v2, s9, 21 +; GCN-NEXT: v_writelane_b32 v2, s10, 22 +; GCN-NEXT: v_writelane_b32 v2, s11, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 24 -; GCN-NEXT: v_writelane_b32 v0, s5, 25 -; GCN-NEXT: v_writelane_b32 v0, s6, 26 -; GCN-NEXT: v_writelane_b32 v0, s7, 27 -; GCN-NEXT: v_writelane_b32 v0, s8, 28 -; GCN-NEXT: v_writelane_b32 v0, s9, 29 -; GCN-NEXT: v_writelane_b32 v0, s10, 30 -; GCN-NEXT: v_writelane_b32 v0, s11, 31 +; GCN-NEXT: v_writelane_b32 v2, s4, 24 +; GCN-NEXT: v_writelane_b32 v2, s5, 25 +; GCN-NEXT: v_writelane_b32 v2, s6, 26 +; GCN-NEXT: v_writelane_b32 v2, s7, 27 +; GCN-NEXT: v_writelane_b32 v2, s8, 28 +; GCN-NEXT: v_writelane_b32 v2, s9, 29 +; GCN-NEXT: v_writelane_b32 v2, s10, 30 +; GCN-NEXT: v_writelane_b32 v2, s11, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 32 -; GCN-NEXT: v_writelane_b32 v0, s5, 33 -; GCN-NEXT: v_writelane_b32 v0, s6, 34 -; GCN-NEXT: v_writelane_b32 v0, s7, 35 -; GCN-NEXT: v_writelane_b32 v0, s8, 36 -; GCN-NEXT: v_writelane_b32 v0, s9, 37 -; GCN-NEXT: v_writelane_b32 v0, s10, 38 -; GCN-NEXT: v_writelane_b32 v0, s11, 39 +; GCN-NEXT: v_writelane_b32 v2, s4, 32 +; GCN-NEXT: v_writelane_b32 v2, s5, 33 +; GCN-NEXT: v_writelane_b32 v2, s6, 34 +; GCN-NEXT: v_writelane_b32 v2, s7, 35 +; GCN-NEXT: v_writelane_b32 v2, s8, 36 +; GCN-NEXT: v_writelane_b32 v2, s9, 37 +; GCN-NEXT: v_writelane_b32 v2, s10, 38 +; GCN-NEXT: v_writelane_b32 v2, s11, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 40 -; GCN-NEXT: v_writelane_b32 v0, s5, 41 -; GCN-NEXT: v_writelane_b32 v0, s6, 42 -; GCN-NEXT: v_writelane_b32 v0, s7, 43 -; GCN-NEXT: v_writelane_b32 v0, s8, 44 -; GCN-NEXT: v_writelane_b32 v0, s9, 45 -; GCN-NEXT: v_writelane_b32 v0, s10, 46 -; GCN-NEXT: v_writelane_b32 v0, s11, 47 +; GCN-NEXT: v_writelane_b32 v2, s4, 40 +; GCN-NEXT: v_writelane_b32 v2, s5, 41 +; GCN-NEXT: v_writelane_b32 v2, s6, 42 +; GCN-NEXT: v_writelane_b32 v2, s7, 43 +; GCN-NEXT: v_writelane_b32 v2, s8, 44 +; GCN-NEXT: v_writelane_b32 v2, s9, 45 +; GCN-NEXT: v_writelane_b32 v2, s10, 46 +; GCN-NEXT: v_writelane_b32 v2, s11, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v2, s4, 48 +; GCN-NEXT: v_writelane_b32 v2, s5, 49 +; GCN-NEXT: v_writelane_b32 v2, s6, 50 +; GCN-NEXT: v_writelane_b32 v2, s7, 51 +; GCN-NEXT: v_writelane_b32 v2, s8, 52 +; GCN-NEXT: v_writelane_b32 v2, s9, 53 +; GCN-NEXT: v_writelane_b32 v2, s10, 54 +; GCN-NEXT: v_writelane_b32 v2, s11, 55 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 56 -; GCN-NEXT: v_writelane_b32 v0, s5, 57 -; GCN-NEXT: v_writelane_b32 v0, s6, 58 -; GCN-NEXT: v_writelane_b32 v0, s7, 59 -; GCN-NEXT: v_writelane_b32 v0, s8, 60 -; GCN-NEXT: v_writelane_b32 v0, s9, 61 -; GCN-NEXT: v_writelane_b32 v0, s10, 62 -; GCN-NEXT: v_writelane_b32 v0, s11, 63 +; GCN-NEXT: v_writelane_b32 v2, s4, 56 +; GCN-NEXT: v_writelane_b32 v2, s5, 57 +; GCN-NEXT: v_writelane_b32 v2, s6, 58 +; GCN-NEXT: v_writelane_b32 v2, s7, 59 +; GCN-NEXT: v_writelane_b32 v2, s8, 60 +; GCN-NEXT: v_writelane_b32 v2, s9, 61 +; GCN-NEXT: v_writelane_b32 v2, s10, 62 +; GCN-NEXT: v_writelane_b32 v2, s11, 63 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND @@ -188,176 +200,192 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, ; GCN-NEXT: v_writelane_b32 v1, s9, 61 ; GCN-NEXT: v_writelane_b32 v1, s10, 62 ; GCN-NEXT: v_writelane_b32 v1, s11, 63 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v2, s4, 0 -; GCN-NEXT: v_writelane_b32 v2, s5, 1 -; GCN-NEXT: v_writelane_b32 v2, s6, 2 -; GCN-NEXT: v_writelane_b32 v2, s7, 3 -; GCN-NEXT: v_writelane_b32 v2, s8, 4 -; GCN-NEXT: v_writelane_b32 v2, s9, 5 -; GCN-NEXT: v_writelane_b32 v2, s10, 6 -; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s8, v1, 56 -; GCN-NEXT: v_readlane_b32 s9, v1, 57 -; GCN-NEXT: v_readlane_b32 s10, v1, 58 -; GCN-NEXT: v_readlane_b32 s11, v1, 59 -; GCN-NEXT: v_readlane_b32 s12, v1, 60 -; GCN-NEXT: v_readlane_b32 s13, v1, 61 -; GCN-NEXT: v_readlane_b32 s14, v1, 62 -; GCN-NEXT: v_readlane_b32 s15, v1, 63 -; GCN-NEXT: v_readlane_b32 s16, v1, 48 -; GCN-NEXT: v_readlane_b32 s17, v1, 49 -; GCN-NEXT: v_readlane_b32 s18, v1, 50 -; GCN-NEXT: v_readlane_b32 s19, v1, 51 -; GCN-NEXT: v_readlane_b32 s20, v1, 52 -; GCN-NEXT: v_readlane_b32 s21, v1, 53 -; GCN-NEXT: v_readlane_b32 s22, v1, 54 -; GCN-NEXT: v_readlane_b32 s23, v1, 55 -; GCN-NEXT: v_readlane_b32 s24, v1, 40 -; GCN-NEXT: v_readlane_b32 s25, v1, 41 -; GCN-NEXT: v_readlane_b32 s26, v1, 42 -; GCN-NEXT: v_readlane_b32 s27, v1, 43 -; GCN-NEXT: v_readlane_b32 s28, v1, 44 -; GCN-NEXT: v_readlane_b32 s29, v1, 45 -; GCN-NEXT: v_readlane_b32 s30, v1, 46 -; GCN-NEXT: v_readlane_b32 s31, v1, 47 -; GCN-NEXT: v_readlane_b32 s36, v1, 32 -; GCN-NEXT: v_readlane_b32 s37, v1, 33 -; GCN-NEXT: v_readlane_b32 s38, v1, 34 -; GCN-NEXT: v_readlane_b32 s39, v1, 35 -; GCN-NEXT: v_readlane_b32 s40, v1, 36 -; GCN-NEXT: v_readlane_b32 s41, v1, 37 -; GCN-NEXT: v_readlane_b32 s42, v1, 38 -; GCN-NEXT: v_readlane_b32 s43, v1, 39 -; GCN-NEXT: v_readlane_b32 s44, v1, 24 -; GCN-NEXT: v_readlane_b32 s45, v1, 25 -; GCN-NEXT: v_readlane_b32 s46, v1, 26 -; GCN-NEXT: v_readlane_b32 s47, v1, 27 -; GCN-NEXT: v_readlane_b32 s48, v1, 28 -; GCN-NEXT: v_readlane_b32 s49, v1, 29 -; GCN-NEXT: v_readlane_b32 s50, v1, 30 -; GCN-NEXT: v_readlane_b32 s51, v1, 31 -; GCN-NEXT: v_readlane_b32 s52, v1, 16 -; GCN-NEXT: v_readlane_b32 s53, v1, 17 -; GCN-NEXT: v_readlane_b32 s54, v1, 18 -; GCN-NEXT: v_readlane_b32 s55, v1, 19 -; GCN-NEXT: v_readlane_b32 s56, v1, 20 -; GCN-NEXT: v_readlane_b32 s57, v1, 21 -; GCN-NEXT: v_readlane_b32 s58, v1, 22 -; GCN-NEXT: v_readlane_b32 s59, v1, 23 -; GCN-NEXT: v_readlane_b32 s60, v1, 8 -; GCN-NEXT: v_readlane_b32 s61, v1, 9 -; GCN-NEXT: v_readlane_b32 s62, v1, 10 -; GCN-NEXT: v_readlane_b32 s63, v1, 11 -; GCN-NEXT: v_readlane_b32 s64, v1, 12 -; GCN-NEXT: v_readlane_b32 s65, v1, 13 -; GCN-NEXT: v_readlane_b32 s66, v1, 14 -; GCN-NEXT: v_readlane_b32 s67, v1, 15 -; GCN-NEXT: v_readlane_b32 s68, v1, 0 -; GCN-NEXT: v_readlane_b32 s69, v1, 1 -; GCN-NEXT: v_readlane_b32 s70, v1, 2 -; GCN-NEXT: v_readlane_b32 s71, v1, 3 -; GCN-NEXT: v_readlane_b32 s72, v1, 4 -; GCN-NEXT: v_readlane_b32 s73, v1, 5 -; GCN-NEXT: v_readlane_b32 s74, v1, 6 -; GCN-NEXT: v_readlane_b32 s75, v1, 7 -; GCN-NEXT: v_readlane_b32 s76, v0, 56 -; GCN-NEXT: v_readlane_b32 s77, v0, 57 -; GCN-NEXT: v_readlane_b32 s78, v0, 58 -; GCN-NEXT: v_readlane_b32 s79, v0, 59 -; GCN-NEXT: v_readlane_b32 s80, v0, 60 -; GCN-NEXT: v_readlane_b32 s81, v0, 61 -; GCN-NEXT: v_readlane_b32 s82, v0, 62 -; GCN-NEXT: v_readlane_b32 s83, v0, 63 -; GCN-NEXT: v_readlane_b32 s84, v0, 48 -; GCN-NEXT: v_readlane_b32 s85, v0, 49 -; GCN-NEXT: v_readlane_b32 s86, v0, 50 -; GCN-NEXT: v_readlane_b32 s87, v0, 51 -; GCN-NEXT: v_readlane_b32 s88, v0, 52 -; GCN-NEXT: v_readlane_b32 s89, v0, 53 -; GCN-NEXT: v_readlane_b32 s90, v0, 54 -; GCN-NEXT: v_readlane_b32 s91, v0, 55 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s8, v2, 56 +; GCN-NEXT: v_readlane_b32 s9, v2, 57 +; GCN-NEXT: v_readlane_b32 s10, v2, 58 +; GCN-NEXT: v_readlane_b32 s11, v2, 59 +; GCN-NEXT: v_readlane_b32 s12, v2, 60 +; GCN-NEXT: v_readlane_b32 s13, v2, 61 +; GCN-NEXT: v_readlane_b32 s14, v2, 62 +; GCN-NEXT: v_readlane_b32 s15, v2, 63 +; GCN-NEXT: v_readlane_b32 s16, v2, 48 +; GCN-NEXT: v_readlane_b32 s17, v2, 49 +; GCN-NEXT: v_readlane_b32 s18, v2, 50 +; GCN-NEXT: v_readlane_b32 s19, v2, 51 +; GCN-NEXT: v_readlane_b32 s20, v2, 52 +; GCN-NEXT: v_readlane_b32 s21, v2, 53 +; GCN-NEXT: v_readlane_b32 s22, v2, 54 +; GCN-NEXT: v_readlane_b32 s23, v2, 55 +; GCN-NEXT: v_readlane_b32 s24, v2, 40 +; GCN-NEXT: v_readlane_b32 s25, v2, 41 +; GCN-NEXT: v_readlane_b32 s26, v2, 42 +; GCN-NEXT: v_readlane_b32 s27, v2, 43 +; GCN-NEXT: v_readlane_b32 s28, v2, 44 +; GCN-NEXT: v_readlane_b32 s29, v2, 45 +; GCN-NEXT: v_readlane_b32 s30, v2, 46 +; GCN-NEXT: v_readlane_b32 s31, v2, 47 +; GCN-NEXT: v_readlane_b32 s36, v2, 32 +; GCN-NEXT: v_readlane_b32 s37, v2, 33 +; GCN-NEXT: v_readlane_b32 s38, v2, 34 +; GCN-NEXT: v_readlane_b32 s39, v2, 35 +; GCN-NEXT: v_readlane_b32 s40, v2, 36 +; GCN-NEXT: v_readlane_b32 s41, v2, 37 +; GCN-NEXT: v_readlane_b32 s42, v2, 38 +; GCN-NEXT: v_readlane_b32 s43, v2, 39 +; GCN-NEXT: v_readlane_b32 s44, v2, 24 +; GCN-NEXT: v_readlane_b32 s45, v2, 25 +; GCN-NEXT: v_readlane_b32 s46, v2, 26 +; GCN-NEXT: v_readlane_b32 s47, v2, 27 +; GCN-NEXT: v_readlane_b32 s48, v2, 28 +; GCN-NEXT: v_readlane_b32 s49, v2, 29 +; GCN-NEXT: v_readlane_b32 s50, v2, 30 +; GCN-NEXT: v_readlane_b32 s51, v2, 31 +; GCN-NEXT: v_readlane_b32 s52, v2, 16 +; GCN-NEXT: v_readlane_b32 s53, v2, 17 +; GCN-NEXT: v_readlane_b32 s54, v2, 18 +; GCN-NEXT: v_readlane_b32 s55, v2, 19 +; GCN-NEXT: v_readlane_b32 s56, v2, 20 +; GCN-NEXT: v_readlane_b32 s57, v2, 21 +; GCN-NEXT: v_readlane_b32 s58, v2, 22 +; GCN-NEXT: v_readlane_b32 s59, v2, 23 +; GCN-NEXT: v_readlane_b32 s60, v2, 8 +; GCN-NEXT: v_readlane_b32 s61, v2, 9 +; GCN-NEXT: v_readlane_b32 s62, v2, 10 +; GCN-NEXT: v_readlane_b32 s63, v2, 11 +; GCN-NEXT: v_readlane_b32 s64, v2, 12 +; GCN-NEXT: v_readlane_b32 s65, v2, 13 +; GCN-NEXT: v_readlane_b32 s66, v2, 14 +; GCN-NEXT: v_readlane_b32 s67, v2, 15 +; GCN-NEXT: v_readlane_b32 s68, v2, 0 +; GCN-NEXT: v_readlane_b32 s69, v2, 1 +; GCN-NEXT: v_readlane_b32 s70, v2, 2 +; GCN-NEXT: v_readlane_b32 s71, v2, 3 +; GCN-NEXT: v_readlane_b32 s72, v2, 4 +; GCN-NEXT: v_readlane_b32 s73, v2, 5 +; GCN-NEXT: v_readlane_b32 s74, v2, 6 +; GCN-NEXT: v_readlane_b32 s75, v2, 7 +; GCN-NEXT: v_readlane_b32 s76, v1, 56 +; GCN-NEXT: v_readlane_b32 s77, v1, 57 +; GCN-NEXT: v_readlane_b32 s78, v1, 58 +; GCN-NEXT: v_readlane_b32 s79, v1, 59 +; GCN-NEXT: v_readlane_b32 s80, v1, 60 +; GCN-NEXT: v_readlane_b32 s81, v1, 61 +; GCN-NEXT: v_readlane_b32 s82, v1, 62 +; GCN-NEXT: v_readlane_b32 s83, v1, 63 +; GCN-NEXT: v_readlane_b32 s84, v1, 48 +; GCN-NEXT: v_readlane_b32 s85, v1, 49 +; GCN-NEXT: v_readlane_b32 s86, v1, 50 +; GCN-NEXT: v_readlane_b32 s87, v1, 51 +; GCN-NEXT: v_readlane_b32 s88, v1, 52 +; GCN-NEXT: v_readlane_b32 s89, v1, 53 +; GCN-NEXT: v_readlane_b32 s90, v1, 54 +; GCN-NEXT: v_readlane_b32 s91, v1, 55 +; GCN-NEXT: v_readlane_b32 s0, v1, 0 +; GCN-NEXT: v_readlane_b32 s1, v1, 1 +; GCN-NEXT: v_readlane_b32 s2, v1, 2 +; GCN-NEXT: v_readlane_b32 s3, v1, 3 +; GCN-NEXT: v_readlane_b32 s4, v1, 4 +; GCN-NEXT: v_readlane_b32 s5, v1, 5 +; GCN-NEXT: v_readlane_b32 s6, v1, 6 +; GCN-NEXT: v_readlane_b32 s7, v1, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 8 -; GCN-NEXT: v_readlane_b32 s1, v0, 9 -; GCN-NEXT: v_readlane_b32 s2, v0, 10 -; GCN-NEXT: v_readlane_b32 s3, v0, 11 -; GCN-NEXT: v_readlane_b32 s4, v0, 12 -; GCN-NEXT: v_readlane_b32 s5, v0, 13 -; GCN-NEXT: v_readlane_b32 s6, v0, 14 -; GCN-NEXT: v_readlane_b32 s7, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v1, 8 +; GCN-NEXT: v_readlane_b32 s1, v1, 9 +; GCN-NEXT: v_readlane_b32 s2, v1, 10 +; GCN-NEXT: v_readlane_b32 s3, v1, 11 +; GCN-NEXT: v_readlane_b32 s4, v1, 12 +; GCN-NEXT: v_readlane_b32 s5, v1, 13 +; GCN-NEXT: v_readlane_b32 s6, v1, 14 +; GCN-NEXT: v_readlane_b32 s7, v1, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s0, v1, 16 +; GCN-NEXT: v_readlane_b32 s1, v1, 17 +; GCN-NEXT: v_readlane_b32 s2, v1, 18 +; GCN-NEXT: v_readlane_b32 s3, v1, 19 +; GCN-NEXT: v_readlane_b32 s4, v1, 20 +; GCN-NEXT: v_readlane_b32 s5, v1, 21 +; GCN-NEXT: v_readlane_b32 s6, v1, 22 +; GCN-NEXT: v_readlane_b32 s7, v1, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 24 -; GCN-NEXT: v_readlane_b32 s1, v0, 25 -; GCN-NEXT: v_readlane_b32 s2, v0, 26 -; GCN-NEXT: v_readlane_b32 s3, v0, 27 -; GCN-NEXT: v_readlane_b32 s4, v0, 28 -; GCN-NEXT: v_readlane_b32 s5, v0, 29 -; GCN-NEXT: v_readlane_b32 s6, v0, 30 -; GCN-NEXT: v_readlane_b32 s7, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v1, 24 +; GCN-NEXT: v_readlane_b32 s1, v1, 25 +; GCN-NEXT: v_readlane_b32 s2, v1, 26 +; GCN-NEXT: v_readlane_b32 s3, v1, 27 +; GCN-NEXT: v_readlane_b32 s4, v1, 28 +; GCN-NEXT: v_readlane_b32 s5, v1, 29 +; GCN-NEXT: v_readlane_b32 s6, v1, 30 +; GCN-NEXT: v_readlane_b32 s7, v1, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 32 -; GCN-NEXT: v_readlane_b32 s1, v0, 33 -; GCN-NEXT: v_readlane_b32 s2, v0, 34 -; GCN-NEXT: v_readlane_b32 s3, v0, 35 -; GCN-NEXT: v_readlane_b32 s4, v0, 36 -; GCN-NEXT: v_readlane_b32 s5, v0, 37 -; GCN-NEXT: v_readlane_b32 s6, v0, 38 -; GCN-NEXT: v_readlane_b32 s7, v0, 39 +; GCN-NEXT: v_readlane_b32 s0, v1, 32 +; GCN-NEXT: v_readlane_b32 s1, v1, 33 +; GCN-NEXT: v_readlane_b32 s2, v1, 34 +; GCN-NEXT: v_readlane_b32 s3, v1, 35 +; GCN-NEXT: v_readlane_b32 s4, v1, 36 +; GCN-NEXT: v_readlane_b32 s5, v1, 37 +; GCN-NEXT: v_readlane_b32 s6, v1, 38 +; GCN-NEXT: v_readlane_b32 s7, v1, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 40 -; GCN-NEXT: v_readlane_b32 s1, v0, 41 -; GCN-NEXT: v_readlane_b32 s2, v0, 42 -; GCN-NEXT: v_readlane_b32 s3, v0, 43 -; GCN-NEXT: v_readlane_b32 s4, v0, 44 -; GCN-NEXT: v_readlane_b32 s5, v0, 45 -; GCN-NEXT: v_readlane_b32 s6, v0, 46 -; GCN-NEXT: v_readlane_b32 s7, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v1, 40 +; GCN-NEXT: v_readlane_b32 s1, v1, 41 +; GCN-NEXT: v_readlane_b32 s2, v1, 42 +; GCN-NEXT: v_readlane_b32 s3, v1, 43 +; GCN-NEXT: v_readlane_b32 s4, v1, 44 +; GCN-NEXT: v_readlane_b32 s5, v1, 45 +; GCN-NEXT: v_readlane_b32 s6, v1, 46 +; GCN-NEXT: v_readlane_b32 s7, v1, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 0 -; GCN-NEXT: v_readlane_b32 s1, v2, 1 -; GCN-NEXT: v_readlane_b32 s2, v2, 2 -; GCN-NEXT: v_readlane_b32 s3, v2, 3 -; GCN-NEXT: v_readlane_b32 s4, v2, 4 -; GCN-NEXT: v_readlane_b32 s5, v2, 5 -; GCN-NEXT: v_readlane_b32 s6, v2, 6 -; GCN-NEXT: v_readlane_b32 s7, v2, 7 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[84:91] ; GCN-NEXT: ;;#ASMEND @@ -392,6 +420,18 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB0_2: ; %ret +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: ; kill: killed $vgpr2 +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -442,104 +482,125 @@ ret: define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: split_sgpr_spill_2_vgprs: ; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s54, -1 +; GCN-NEXT: s_mov_b32 s55, 0xe8f000 +; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_addc_u32 s53, s53, 0 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr1 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 -; GCN-NEXT: v_writelane_b32 v0, s12, 8 -; GCN-NEXT: v_writelane_b32 v0, s13, 9 -; GCN-NEXT: v_writelane_b32 v0, s14, 10 -; GCN-NEXT: v_writelane_b32 v0, s15, 11 -; GCN-NEXT: v_writelane_b32 v0, s16, 12 -; GCN-NEXT: v_writelane_b32 v0, s17, 13 -; GCN-NEXT: v_writelane_b32 v0, s18, 14 -; GCN-NEXT: v_writelane_b32 v0, s19, 15 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: v_writelane_b32 v1, s12, 8 +; GCN-NEXT: v_writelane_b32 v1, s13, 9 +; GCN-NEXT: v_writelane_b32 v1, s14, 10 +; GCN-NEXT: v_writelane_b32 v1, s15, 11 +; GCN-NEXT: v_writelane_b32 v1, s16, 12 +; GCN-NEXT: v_writelane_b32 v1, s17, 13 +; GCN-NEXT: v_writelane_b32 v1, s18, 14 +; GCN-NEXT: v_writelane_b32 v1, s19, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 -; GCN-NEXT: v_writelane_b32 v0, s12, 24 -; GCN-NEXT: v_writelane_b32 v0, s13, 25 -; GCN-NEXT: v_writelane_b32 v0, s14, 26 -; GCN-NEXT: v_writelane_b32 v0, s15, 27 -; GCN-NEXT: v_writelane_b32 v0, s16, 28 -; GCN-NEXT: v_writelane_b32 v0, s17, 29 -; GCN-NEXT: v_writelane_b32 v0, s18, 30 -; GCN-NEXT: v_writelane_b32 v0, s19, 31 +; GCN-NEXT: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: v_writelane_b32 v1, s12, 24 +; GCN-NEXT: v_writelane_b32 v1, s13, 25 +; GCN-NEXT: v_writelane_b32 v1, s14, 26 +; GCN-NEXT: v_writelane_b32 v1, s15, 27 +; GCN-NEXT: v_writelane_b32 v1, s16, 28 +; GCN-NEXT: v_writelane_b32 v1, s17, 29 +; GCN-NEXT: v_writelane_b32 v1, s18, 30 +; GCN-NEXT: v_writelane_b32 v1, s19, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 32 -; GCN-NEXT: v_writelane_b32 v0, s5, 33 -; GCN-NEXT: v_writelane_b32 v0, s6, 34 -; GCN-NEXT: v_writelane_b32 v0, s7, 35 -; GCN-NEXT: v_writelane_b32 v0, s8, 36 -; GCN-NEXT: v_writelane_b32 v0, s9, 37 -; GCN-NEXT: v_writelane_b32 v0, s10, 38 -; GCN-NEXT: v_writelane_b32 v0, s11, 39 -; GCN-NEXT: v_writelane_b32 v0, s12, 40 -; GCN-NEXT: v_writelane_b32 v0, s13, 41 -; GCN-NEXT: v_writelane_b32 v0, s14, 42 -; GCN-NEXT: v_writelane_b32 v0, s15, 43 -; GCN-NEXT: v_writelane_b32 v0, s16, 44 -; GCN-NEXT: v_writelane_b32 v0, s17, 45 -; GCN-NEXT: v_writelane_b32 v0, s18, 46 -; GCN-NEXT: v_writelane_b32 v0, s19, 47 +; GCN-NEXT: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: v_writelane_b32 v1, s12, 40 +; GCN-NEXT: v_writelane_b32 v1, s13, 41 +; GCN-NEXT: v_writelane_b32 v1, s14, 42 +; GCN-NEXT: v_writelane_b32 v1, s15, 43 +; GCN-NEXT: v_writelane_b32 v1, s16, 44 +; GCN-NEXT: v_writelane_b32 v1, s17, 45 +; GCN-NEXT: v_writelane_b32 v1, s18, 46 +; GCN-NEXT: v_writelane_b32 v1, s19, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 -; GCN-NEXT: v_writelane_b32 v0, s12, 56 -; GCN-NEXT: v_writelane_b32 v0, s13, 57 -; GCN-NEXT: v_writelane_b32 v0, s14, 58 -; GCN-NEXT: v_writelane_b32 v0, s15, 59 -; GCN-NEXT: v_writelane_b32 v0, s16, 60 -; GCN-NEXT: v_writelane_b32 v0, s17, 61 -; GCN-NEXT: v_writelane_b32 v0, s18, 62 -; GCN-NEXT: v_writelane_b32 v0, s19, 63 +; GCN-NEXT: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 +; GCN-NEXT: v_writelane_b32 v1, s12, 56 +; GCN-NEXT: v_writelane_b32 v1, s13, 57 +; GCN-NEXT: v_writelane_b32 v1, s14, 58 +; GCN-NEXT: v_writelane_b32 v1, s15, 59 +; GCN-NEXT: v_writelane_b32 v1, s16, 60 +; GCN-NEXT: v_writelane_b32 v1, s17, 61 +; GCN-NEXT: v_writelane_b32 v1, s18, 62 +; GCN-NEXT: v_writelane_b32 v1, s19, 63 +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 0 -; GCN-NEXT: v_writelane_b32 v1, s5, 1 -; GCN-NEXT: v_writelane_b32 v1, s6, 2 -; GCN-NEXT: v_writelane_b32 v1, s7, 3 -; GCN-NEXT: v_writelane_b32 v1, s8, 4 -; GCN-NEXT: v_writelane_b32 v1, s9, 5 -; GCN-NEXT: v_writelane_b32 v1, s10, 6 -; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s2, 8 -; GCN-NEXT: v_writelane_b32 v1, s3, 9 +; GCN-NEXT: v_writelane_b32 v0, s2, 8 +; GCN-NEXT: v_writelane_b32 v0, s3, 9 +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 .LBB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s16, v1, 8 ; GCN-NEXT: v_readlane_b32 s17, v1, 9 ; GCN-NEXT: v_readlane_b32 s20, v1, 0 @@ -633,6 +694,14 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB1_2: ; %ret +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -669,9 +738,17 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s3 ; GCN-NEXT: s_addc_u32 s53, s53, 0 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -685,176 +762,176 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 0 -; GCN-NEXT: v_writelane_b32 v31, s5, 1 -; GCN-NEXT: v_writelane_b32 v31, s6, 2 -; GCN-NEXT: v_writelane_b32 v31, s7, 3 -; GCN-NEXT: v_writelane_b32 v31, s8, 4 -; GCN-NEXT: v_writelane_b32 v31, s9, 5 -; GCN-NEXT: v_writelane_b32 v31, s10, 6 -; GCN-NEXT: v_writelane_b32 v31, s11, 7 -; GCN-NEXT: v_writelane_b32 v31, s12, 8 -; GCN-NEXT: v_writelane_b32 v31, s13, 9 -; GCN-NEXT: v_writelane_b32 v31, s14, 10 -; GCN-NEXT: v_writelane_b32 v31, s15, 11 -; GCN-NEXT: v_writelane_b32 v31, s16, 12 -; GCN-NEXT: v_writelane_b32 v31, s17, 13 -; GCN-NEXT: v_writelane_b32 v31, s18, 14 -; GCN-NEXT: v_writelane_b32 v31, s19, 15 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: v_writelane_b32 v1, s12, 8 +; GCN-NEXT: v_writelane_b32 v1, s13, 9 +; GCN-NEXT: v_writelane_b32 v1, s14, 10 +; GCN-NEXT: v_writelane_b32 v1, s15, 11 +; GCN-NEXT: v_writelane_b32 v1, s16, 12 +; GCN-NEXT: v_writelane_b32 v1, s17, 13 +; GCN-NEXT: v_writelane_b32 v1, s18, 14 +; GCN-NEXT: v_writelane_b32 v1, s19, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 16 -; GCN-NEXT: v_writelane_b32 v31, s5, 17 -; GCN-NEXT: v_writelane_b32 v31, s6, 18 -; GCN-NEXT: v_writelane_b32 v31, s7, 19 -; GCN-NEXT: v_writelane_b32 v31, s8, 20 -; GCN-NEXT: v_writelane_b32 v31, s9, 21 -; GCN-NEXT: v_writelane_b32 v31, s10, 22 -; GCN-NEXT: v_writelane_b32 v31, s11, 23 -; GCN-NEXT: v_writelane_b32 v31, s12, 24 -; GCN-NEXT: v_writelane_b32 v31, s13, 25 -; GCN-NEXT: v_writelane_b32 v31, s14, 26 -; GCN-NEXT: v_writelane_b32 v31, s15, 27 -; GCN-NEXT: v_writelane_b32 v31, s16, 28 -; GCN-NEXT: v_writelane_b32 v31, s17, 29 -; GCN-NEXT: v_writelane_b32 v31, s18, 30 -; GCN-NEXT: v_writelane_b32 v31, s19, 31 +; GCN-NEXT: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: v_writelane_b32 v1, s12, 24 +; GCN-NEXT: v_writelane_b32 v1, s13, 25 +; GCN-NEXT: v_writelane_b32 v1, s14, 26 +; GCN-NEXT: v_writelane_b32 v1, s15, 27 +; GCN-NEXT: v_writelane_b32 v1, s16, 28 +; GCN-NEXT: v_writelane_b32 v1, s17, 29 +; GCN-NEXT: v_writelane_b32 v1, s18, 30 +; GCN-NEXT: v_writelane_b32 v1, s19, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 32 -; GCN-NEXT: v_writelane_b32 v31, s5, 33 -; GCN-NEXT: v_writelane_b32 v31, s6, 34 -; GCN-NEXT: v_writelane_b32 v31, s7, 35 -; GCN-NEXT: v_writelane_b32 v31, s8, 36 -; GCN-NEXT: v_writelane_b32 v31, s9, 37 -; GCN-NEXT: v_writelane_b32 v31, s10, 38 -; GCN-NEXT: v_writelane_b32 v31, s11, 39 -; GCN-NEXT: v_writelane_b32 v31, s12, 40 -; GCN-NEXT: v_writelane_b32 v31, s13, 41 -; GCN-NEXT: v_writelane_b32 v31, s14, 42 -; GCN-NEXT: v_writelane_b32 v31, s15, 43 -; GCN-NEXT: v_writelane_b32 v31, s16, 44 -; GCN-NEXT: v_writelane_b32 v31, s17, 45 -; GCN-NEXT: v_writelane_b32 v31, s18, 46 -; GCN-NEXT: v_writelane_b32 v31, s19, 47 +; GCN-NEXT: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: v_writelane_b32 v1, s12, 40 +; GCN-NEXT: v_writelane_b32 v1, s13, 41 +; GCN-NEXT: v_writelane_b32 v1, s14, 42 +; GCN-NEXT: v_writelane_b32 v1, s15, 43 +; GCN-NEXT: v_writelane_b32 v1, s16, 44 +; GCN-NEXT: v_writelane_b32 v1, s17, 45 +; GCN-NEXT: v_writelane_b32 v1, s18, 46 +; GCN-NEXT: v_writelane_b32 v1, s19, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 48 -; GCN-NEXT: v_writelane_b32 v31, s5, 49 -; GCN-NEXT: v_writelane_b32 v31, s6, 50 -; GCN-NEXT: v_writelane_b32 v31, s7, 51 -; GCN-NEXT: v_writelane_b32 v31, s8, 52 -; GCN-NEXT: v_writelane_b32 v31, s9, 53 -; GCN-NEXT: v_writelane_b32 v31, s10, 54 -; GCN-NEXT: v_writelane_b32 v31, s11, 55 -; GCN-NEXT: v_writelane_b32 v31, s12, 56 -; GCN-NEXT: v_writelane_b32 v31, s13, 57 -; GCN-NEXT: v_writelane_b32 v31, s14, 58 -; GCN-NEXT: v_writelane_b32 v31, s15, 59 -; GCN-NEXT: v_writelane_b32 v31, s16, 60 -; GCN-NEXT: v_writelane_b32 v31, s17, 61 -; GCN-NEXT: v_writelane_b32 v31, s18, 62 -; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 +; GCN-NEXT: v_writelane_b32 v1, s12, 56 +; GCN-NEXT: v_writelane_b32 v1, s13, 57 +; GCN-NEXT: v_writelane_b32 v1, s14, 58 +; GCN-NEXT: v_writelane_b32 v1, s15, 59 +; GCN-NEXT: v_writelane_b32 v1, s16, 60 +; GCN-NEXT: v_writelane_b32 v1, s17, 61 +; GCN-NEXT: v_writelane_b32 v1, s18, 62 +; GCN-NEXT: v_writelane_b32 v1, s19, 63 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_writelane_b32 v0, s2, 0 ; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 .LBB2_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s36, v31, 32 -; GCN-NEXT: v_readlane_b32 s37, v31, 33 -; GCN-NEXT: v_readlane_b32 s38, v31, 34 -; GCN-NEXT: v_readlane_b32 s39, v31, 35 -; GCN-NEXT: v_readlane_b32 s40, v31, 36 -; GCN-NEXT: v_readlane_b32 s41, v31, 37 -; GCN-NEXT: v_readlane_b32 s42, v31, 38 -; GCN-NEXT: v_readlane_b32 s43, v31, 39 -; GCN-NEXT: v_readlane_b32 s44, v31, 40 -; GCN-NEXT: v_readlane_b32 s45, v31, 41 -; GCN-NEXT: v_readlane_b32 s46, v31, 42 -; GCN-NEXT: v_readlane_b32 s47, v31, 43 -; GCN-NEXT: v_readlane_b32 s48, v31, 44 -; GCN-NEXT: v_readlane_b32 s49, v31, 45 -; GCN-NEXT: v_readlane_b32 s50, v31, 46 -; GCN-NEXT: v_readlane_b32 s51, v31, 47 -; GCN-NEXT: v_readlane_b32 s0, v31, 16 -; GCN-NEXT: v_readlane_b32 s1, v31, 17 -; GCN-NEXT: v_readlane_b32 s2, v31, 18 -; GCN-NEXT: v_readlane_b32 s3, v31, 19 -; GCN-NEXT: v_readlane_b32 s4, v31, 20 -; GCN-NEXT: v_readlane_b32 s5, v31, 21 -; GCN-NEXT: v_readlane_b32 s6, v31, 22 -; GCN-NEXT: v_readlane_b32 s7, v31, 23 -; GCN-NEXT: v_readlane_b32 s8, v31, 24 -; GCN-NEXT: v_readlane_b32 s9, v31, 25 -; GCN-NEXT: v_readlane_b32 s10, v31, 26 -; GCN-NEXT: v_readlane_b32 s11, v31, 27 -; GCN-NEXT: v_readlane_b32 s12, v31, 28 -; GCN-NEXT: v_readlane_b32 s13, v31, 29 -; GCN-NEXT: v_readlane_b32 s14, v31, 30 -; GCN-NEXT: v_readlane_b32 s15, v31, 31 -; GCN-NEXT: v_readlane_b32 s16, v31, 0 -; GCN-NEXT: v_readlane_b32 s17, v31, 1 -; GCN-NEXT: v_readlane_b32 s18, v31, 2 -; GCN-NEXT: v_readlane_b32 s19, v31, 3 -; GCN-NEXT: v_readlane_b32 s20, v31, 4 -; GCN-NEXT: v_readlane_b32 s21, v31, 5 -; GCN-NEXT: v_readlane_b32 s22, v31, 6 -; GCN-NEXT: v_readlane_b32 s23, v31, 7 -; GCN-NEXT: v_readlane_b32 s24, v31, 8 -; GCN-NEXT: v_readlane_b32 s25, v31, 9 -; GCN-NEXT: v_readlane_b32 s26, v31, 10 -; GCN-NEXT: v_readlane_b32 s27, v31, 11 -; GCN-NEXT: v_readlane_b32 s28, v31, 12 -; GCN-NEXT: v_readlane_b32 s29, v31, 13 -; GCN-NEXT: v_readlane_b32 s30, v31, 14 -; GCN-NEXT: v_readlane_b32 s31, v31, 15 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s36, v1, 32 +; GCN-NEXT: v_readlane_b32 s37, v1, 33 +; GCN-NEXT: v_readlane_b32 s38, v1, 34 +; GCN-NEXT: v_readlane_b32 s39, v1, 35 +; GCN-NEXT: v_readlane_b32 s40, v1, 36 +; GCN-NEXT: v_readlane_b32 s41, v1, 37 +; GCN-NEXT: v_readlane_b32 s42, v1, 38 +; GCN-NEXT: v_readlane_b32 s43, v1, 39 +; GCN-NEXT: v_readlane_b32 s44, v1, 40 +; GCN-NEXT: v_readlane_b32 s45, v1, 41 +; GCN-NEXT: v_readlane_b32 s46, v1, 42 +; GCN-NEXT: v_readlane_b32 s47, v1, 43 +; GCN-NEXT: v_readlane_b32 s48, v1, 44 +; GCN-NEXT: v_readlane_b32 s49, v1, 45 +; GCN-NEXT: v_readlane_b32 s50, v1, 46 +; GCN-NEXT: v_readlane_b32 s51, v1, 47 +; GCN-NEXT: v_readlane_b32 s0, v1, 16 +; GCN-NEXT: v_readlane_b32 s1, v1, 17 +; GCN-NEXT: v_readlane_b32 s2, v1, 18 +; GCN-NEXT: v_readlane_b32 s3, v1, 19 +; GCN-NEXT: v_readlane_b32 s4, v1, 20 +; GCN-NEXT: v_readlane_b32 s5, v1, 21 +; GCN-NEXT: v_readlane_b32 s6, v1, 22 +; GCN-NEXT: v_readlane_b32 s7, v1, 23 +; GCN-NEXT: v_readlane_b32 s8, v1, 24 +; GCN-NEXT: v_readlane_b32 s9, v1, 25 +; GCN-NEXT: v_readlane_b32 s10, v1, 26 +; GCN-NEXT: v_readlane_b32 s11, v1, 27 +; GCN-NEXT: v_readlane_b32 s12, v1, 28 +; GCN-NEXT: v_readlane_b32 s13, v1, 29 +; GCN-NEXT: v_readlane_b32 s14, v1, 30 +; GCN-NEXT: v_readlane_b32 s15, v1, 31 +; GCN-NEXT: v_readlane_b32 s16, v1, 0 +; GCN-NEXT: v_readlane_b32 s17, v1, 1 +; GCN-NEXT: v_readlane_b32 s18, v1, 2 +; GCN-NEXT: v_readlane_b32 s19, v1, 3 +; GCN-NEXT: v_readlane_b32 s20, v1, 4 +; GCN-NEXT: v_readlane_b32 s21, v1, 5 +; GCN-NEXT: v_readlane_b32 s22, v1, 6 +; GCN-NEXT: v_readlane_b32 s23, v1, 7 +; GCN-NEXT: v_readlane_b32 s24, v1, 8 +; GCN-NEXT: v_readlane_b32 s25, v1, 9 +; GCN-NEXT: v_readlane_b32 s26, v1, 10 +; GCN-NEXT: v_readlane_b32 s27, v1, 11 +; GCN-NEXT: v_readlane_b32 s28, v1, 12 +; GCN-NEXT: v_readlane_b32 s29, v1, 13 +; GCN-NEXT: v_readlane_b32 s30, v1, 14 +; GCN-NEXT: v_readlane_b32 s31, v1, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[16:31] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v31, 48 -; GCN-NEXT: v_readlane_b32 s5, v31, 49 -; GCN-NEXT: v_readlane_b32 s6, v31, 50 -; GCN-NEXT: v_readlane_b32 s7, v31, 51 -; GCN-NEXT: v_readlane_b32 s8, v31, 52 -; GCN-NEXT: v_readlane_b32 s9, v31, 53 -; GCN-NEXT: v_readlane_b32 s10, v31, 54 -; GCN-NEXT: v_readlane_b32 s11, v31, 55 -; GCN-NEXT: v_readlane_b32 s12, v31, 56 -; GCN-NEXT: v_readlane_b32 s13, v31, 57 -; GCN-NEXT: v_readlane_b32 s14, v31, 58 -; GCN-NEXT: v_readlane_b32 s15, v31, 59 -; GCN-NEXT: v_readlane_b32 s16, v31, 60 -; GCN-NEXT: v_readlane_b32 s17, v31, 61 -; GCN-NEXT: v_readlane_b32 s18, v31, 62 -; GCN-NEXT: v_readlane_b32 s19, v31, 63 -; GCN-NEXT: s_mov_b64 s[2:3], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s4, v1, 48 +; GCN-NEXT: v_readlane_b32 s5, v1, 49 +; GCN-NEXT: v_readlane_b32 s6, v1, 50 +; GCN-NEXT: v_readlane_b32 s7, v1, 51 +; GCN-NEXT: v_readlane_b32 s8, v1, 52 +; GCN-NEXT: v_readlane_b32 s9, v1, 53 +; GCN-NEXT: v_readlane_b32 s10, v1, 54 +; GCN-NEXT: v_readlane_b32 s11, v1, 55 +; GCN-NEXT: v_readlane_b32 s12, v1, 56 +; GCN-NEXT: v_readlane_b32 s13, v1, 57 +; GCN-NEXT: v_readlane_b32 s14, v1, 58 +; GCN-NEXT: v_readlane_b32 s15, v1, 59 +; GCN-NEXT: v_readlane_b32 s16, v1, 60 +; GCN-NEXT: v_readlane_b32 s17, v1, 61 +; GCN-NEXT: v_readlane_b32 s18, v1, 62 +; GCN-NEXT: v_readlane_b32 s19, v1, 63 ; GCN-NEXT: v_readlane_b32 s0, v0, 0 ; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[2:3] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -865,6 +942,14 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 ; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB2_2: ; %ret +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 @@ -904,9 +989,17 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s3 ; GCN-NEXT: s_addc_u32 s53, s53, 0 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -920,144 +1013,152 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 0 -; GCN-NEXT: v_writelane_b32 v31, s5, 1 -; GCN-NEXT: v_writelane_b32 v31, s6, 2 -; GCN-NEXT: v_writelane_b32 v31, s7, 3 -; GCN-NEXT: v_writelane_b32 v31, s8, 4 -; GCN-NEXT: v_writelane_b32 v31, s9, 5 -; GCN-NEXT: v_writelane_b32 v31, s10, 6 -; GCN-NEXT: v_writelane_b32 v31, s11, 7 -; GCN-NEXT: v_writelane_b32 v31, s12, 8 -; GCN-NEXT: v_writelane_b32 v31, s13, 9 -; GCN-NEXT: v_writelane_b32 v31, s14, 10 -; GCN-NEXT: v_writelane_b32 v31, s15, 11 -; GCN-NEXT: v_writelane_b32 v31, s16, 12 -; GCN-NEXT: v_writelane_b32 v31, s17, 13 -; GCN-NEXT: v_writelane_b32 v31, s18, 14 -; GCN-NEXT: v_writelane_b32 v31, s19, 15 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: v_writelane_b32 v1, s12, 8 +; GCN-NEXT: v_writelane_b32 v1, s13, 9 +; GCN-NEXT: v_writelane_b32 v1, s14, 10 +; GCN-NEXT: v_writelane_b32 v1, s15, 11 +; GCN-NEXT: v_writelane_b32 v1, s16, 12 +; GCN-NEXT: v_writelane_b32 v1, s17, 13 +; GCN-NEXT: v_writelane_b32 v1, s18, 14 +; GCN-NEXT: v_writelane_b32 v1, s19, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 16 -; GCN-NEXT: v_writelane_b32 v31, s5, 17 -; GCN-NEXT: v_writelane_b32 v31, s6, 18 -; GCN-NEXT: v_writelane_b32 v31, s7, 19 -; GCN-NEXT: v_writelane_b32 v31, s8, 20 -; GCN-NEXT: v_writelane_b32 v31, s9, 21 -; GCN-NEXT: v_writelane_b32 v31, s10, 22 -; GCN-NEXT: v_writelane_b32 v31, s11, 23 -; GCN-NEXT: v_writelane_b32 v31, s12, 24 -; GCN-NEXT: v_writelane_b32 v31, s13, 25 -; GCN-NEXT: v_writelane_b32 v31, s14, 26 -; GCN-NEXT: v_writelane_b32 v31, s15, 27 -; GCN-NEXT: v_writelane_b32 v31, s16, 28 -; GCN-NEXT: v_writelane_b32 v31, s17, 29 -; GCN-NEXT: v_writelane_b32 v31, s18, 30 -; GCN-NEXT: v_writelane_b32 v31, s19, 31 +; GCN-NEXT: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: v_writelane_b32 v1, s12, 24 +; GCN-NEXT: v_writelane_b32 v1, s13, 25 +; GCN-NEXT: v_writelane_b32 v1, s14, 26 +; GCN-NEXT: v_writelane_b32 v1, s15, 27 +; GCN-NEXT: v_writelane_b32 v1, s16, 28 +; GCN-NEXT: v_writelane_b32 v1, s17, 29 +; GCN-NEXT: v_writelane_b32 v1, s18, 30 +; GCN-NEXT: v_writelane_b32 v1, s19, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 32 -; GCN-NEXT: v_writelane_b32 v31, s5, 33 -; GCN-NEXT: v_writelane_b32 v31, s6, 34 -; GCN-NEXT: v_writelane_b32 v31, s7, 35 -; GCN-NEXT: v_writelane_b32 v31, s8, 36 -; GCN-NEXT: v_writelane_b32 v31, s9, 37 -; GCN-NEXT: v_writelane_b32 v31, s10, 38 -; GCN-NEXT: v_writelane_b32 v31, s11, 39 -; GCN-NEXT: v_writelane_b32 v31, s12, 40 -; GCN-NEXT: v_writelane_b32 v31, s13, 41 -; GCN-NEXT: v_writelane_b32 v31, s14, 42 -; GCN-NEXT: v_writelane_b32 v31, s15, 43 -; GCN-NEXT: v_writelane_b32 v31, s16, 44 -; GCN-NEXT: v_writelane_b32 v31, s17, 45 -; GCN-NEXT: v_writelane_b32 v31, s18, 46 -; GCN-NEXT: v_writelane_b32 v31, s19, 47 +; GCN-NEXT: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: v_writelane_b32 v1, s12, 40 +; GCN-NEXT: v_writelane_b32 v1, s13, 41 +; GCN-NEXT: v_writelane_b32 v1, s14, 42 +; GCN-NEXT: v_writelane_b32 v1, s15, 43 +; GCN-NEXT: v_writelane_b32 v1, s16, 44 +; GCN-NEXT: v_writelane_b32 v1, s17, 45 +; GCN-NEXT: v_writelane_b32 v1, s18, 46 +; GCN-NEXT: v_writelane_b32 v1, s19, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 48 -; GCN-NEXT: v_writelane_b32 v31, s5, 49 -; GCN-NEXT: v_writelane_b32 v31, s6, 50 -; GCN-NEXT: v_writelane_b32 v31, s7, 51 -; GCN-NEXT: v_writelane_b32 v31, s8, 52 -; GCN-NEXT: v_writelane_b32 v31, s9, 53 -; GCN-NEXT: v_writelane_b32 v31, s10, 54 -; GCN-NEXT: v_writelane_b32 v31, s11, 55 -; GCN-NEXT: v_writelane_b32 v31, s12, 56 -; GCN-NEXT: v_writelane_b32 v31, s13, 57 -; GCN-NEXT: v_writelane_b32 v31, s14, 58 -; GCN-NEXT: v_writelane_b32 v31, s15, 59 -; GCN-NEXT: v_writelane_b32 v31, s16, 60 -; GCN-NEXT: v_writelane_b32 v31, s17, 61 -; GCN-NEXT: v_writelane_b32 v31, s18, 62 -; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 +; GCN-NEXT: v_writelane_b32 v1, s12, 56 +; GCN-NEXT: v_writelane_b32 v1, s13, 57 +; GCN-NEXT: v_writelane_b32 v1, s14, 58 +; GCN-NEXT: v_writelane_b32 v1, s15, 59 +; GCN-NEXT: v_writelane_b32 v1, s16, 60 +; GCN-NEXT: v_writelane_b32 v1, s17, 61 +; GCN-NEXT: v_writelane_b32 v1, s18, 62 +; GCN-NEXT: v_writelane_b32 v1, s19, 63 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 +; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_writelane_b32 v0, s2, 0 ; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 .LBB3_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s36, v31, 32 -; GCN-NEXT: v_readlane_b32 s37, v31, 33 -; GCN-NEXT: v_readlane_b32 s38, v31, 34 -; GCN-NEXT: v_readlane_b32 s39, v31, 35 -; GCN-NEXT: v_readlane_b32 s40, v31, 36 -; GCN-NEXT: v_readlane_b32 s41, v31, 37 -; GCN-NEXT: v_readlane_b32 s42, v31, 38 -; GCN-NEXT: v_readlane_b32 s43, v31, 39 -; GCN-NEXT: v_readlane_b32 s44, v31, 40 -; GCN-NEXT: v_readlane_b32 s45, v31, 41 -; GCN-NEXT: v_readlane_b32 s46, v31, 42 -; GCN-NEXT: v_readlane_b32 s47, v31, 43 -; GCN-NEXT: v_readlane_b32 s48, v31, 44 -; GCN-NEXT: v_readlane_b32 s49, v31, 45 -; GCN-NEXT: v_readlane_b32 s50, v31, 46 -; GCN-NEXT: v_readlane_b32 s51, v31, 47 -; GCN-NEXT: v_readlane_b32 s0, v31, 16 -; GCN-NEXT: v_readlane_b32 s1, v31, 17 -; GCN-NEXT: v_readlane_b32 s2, v31, 18 -; GCN-NEXT: v_readlane_b32 s3, v31, 19 -; GCN-NEXT: v_readlane_b32 s4, v31, 20 -; GCN-NEXT: v_readlane_b32 s5, v31, 21 -; GCN-NEXT: v_readlane_b32 s6, v31, 22 -; GCN-NEXT: v_readlane_b32 s7, v31, 23 -; GCN-NEXT: v_readlane_b32 s8, v31, 24 -; GCN-NEXT: v_readlane_b32 s9, v31, 25 -; GCN-NEXT: v_readlane_b32 s10, v31, 26 -; GCN-NEXT: v_readlane_b32 s11, v31, 27 -; GCN-NEXT: v_readlane_b32 s12, v31, 28 -; GCN-NEXT: v_readlane_b32 s13, v31, 29 -; GCN-NEXT: v_readlane_b32 s14, v31, 30 -; GCN-NEXT: v_readlane_b32 s15, v31, 31 -; GCN-NEXT: v_readlane_b32 s16, v31, 0 -; GCN-NEXT: v_readlane_b32 s17, v31, 1 -; GCN-NEXT: v_readlane_b32 s18, v31, 2 -; GCN-NEXT: v_readlane_b32 s19, v31, 3 -; GCN-NEXT: v_readlane_b32 s20, v31, 4 -; GCN-NEXT: v_readlane_b32 s21, v31, 5 -; GCN-NEXT: v_readlane_b32 s22, v31, 6 -; GCN-NEXT: v_readlane_b32 s23, v31, 7 -; GCN-NEXT: v_readlane_b32 s24, v31, 8 -; GCN-NEXT: v_readlane_b32 s25, v31, 9 -; GCN-NEXT: v_readlane_b32 s26, v31, 10 -; GCN-NEXT: v_readlane_b32 s27, v31, 11 -; GCN-NEXT: v_readlane_b32 s28, v31, 12 -; GCN-NEXT: v_readlane_b32 s29, v31, 13 -; GCN-NEXT: v_readlane_b32 s30, v31, 14 -; GCN-NEXT: v_readlane_b32 s31, v31, 15 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s36, v2, 32 +; GCN-NEXT: v_readlane_b32 s37, v2, 33 +; GCN-NEXT: v_readlane_b32 s38, v2, 34 +; GCN-NEXT: v_readlane_b32 s39, v2, 35 +; GCN-NEXT: v_readlane_b32 s40, v2, 36 +; GCN-NEXT: v_readlane_b32 s41, v2, 37 +; GCN-NEXT: v_readlane_b32 s42, v2, 38 +; GCN-NEXT: v_readlane_b32 s43, v2, 39 +; GCN-NEXT: v_readlane_b32 s44, v2, 40 +; GCN-NEXT: v_readlane_b32 s45, v2, 41 +; GCN-NEXT: v_readlane_b32 s46, v2, 42 +; GCN-NEXT: v_readlane_b32 s47, v2, 43 +; GCN-NEXT: v_readlane_b32 s48, v2, 44 +; GCN-NEXT: v_readlane_b32 s49, v2, 45 +; GCN-NEXT: v_readlane_b32 s50, v2, 46 +; GCN-NEXT: v_readlane_b32 s51, v2, 47 +; GCN-NEXT: v_readlane_b32 s0, v2, 16 +; GCN-NEXT: v_readlane_b32 s1, v2, 17 +; GCN-NEXT: v_readlane_b32 s2, v2, 18 +; GCN-NEXT: v_readlane_b32 s3, v2, 19 +; GCN-NEXT: v_readlane_b32 s4, v2, 20 +; GCN-NEXT: v_readlane_b32 s5, v2, 21 +; GCN-NEXT: v_readlane_b32 s6, v2, 22 +; GCN-NEXT: v_readlane_b32 s7, v2, 23 +; GCN-NEXT: v_readlane_b32 s8, v2, 24 +; GCN-NEXT: v_readlane_b32 s9, v2, 25 +; GCN-NEXT: v_readlane_b32 s10, v2, 26 +; GCN-NEXT: v_readlane_b32 s11, v2, 27 +; GCN-NEXT: v_readlane_b32 s12, v2, 28 +; GCN-NEXT: v_readlane_b32 s13, v2, 29 +; GCN-NEXT: v_readlane_b32 s14, v2, 30 +; GCN-NEXT: v_readlane_b32 s15, v2, 31 +; GCN-NEXT: v_readlane_b32 s16, v2, 0 +; GCN-NEXT: v_readlane_b32 s17, v2, 1 +; GCN-NEXT: v_readlane_b32 s18, v2, 2 +; GCN-NEXT: v_readlane_b32 s19, v2, 3 +; GCN-NEXT: v_readlane_b32 s20, v2, 4 +; GCN-NEXT: v_readlane_b32 s21, v2, 5 +; GCN-NEXT: v_readlane_b32 s22, v2, 6 +; GCN-NEXT: v_readlane_b32 s23, v2, 7 +; GCN-NEXT: v_readlane_b32 s24, v2, 8 +; GCN-NEXT: v_readlane_b32 s25, v2, 9 +; GCN-NEXT: v_readlane_b32 s26, v2, 10 +; GCN-NEXT: v_readlane_b32 s27, v2, 11 +; GCN-NEXT: v_readlane_b32 s28, v2, 12 +; GCN-NEXT: v_readlane_b32 s29, v2, 13 +; GCN-NEXT: v_readlane_b32 s30, v2, 14 +; GCN-NEXT: v_readlane_b32 s31, v2, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def v0 ; GCN-NEXT: ;;#ASMEND @@ -1067,32 +1168,24 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v31, 48 -; GCN-NEXT: v_readlane_b32 s5, v31, 49 -; GCN-NEXT: v_readlane_b32 s6, v31, 50 -; GCN-NEXT: v_readlane_b32 s7, v31, 51 -; GCN-NEXT: v_readlane_b32 s8, v31, 52 -; GCN-NEXT: v_readlane_b32 s9, v31, 53 -; GCN-NEXT: v_readlane_b32 s10, v31, 54 -; GCN-NEXT: v_readlane_b32 s11, v31, 55 -; GCN-NEXT: v_readlane_b32 s12, v31, 56 -; GCN-NEXT: v_readlane_b32 s13, v31, 57 -; GCN-NEXT: v_readlane_b32 s14, v31, 58 -; GCN-NEXT: v_readlane_b32 s15, v31, 59 -; GCN-NEXT: v_readlane_b32 s16, v31, 60 -; GCN-NEXT: v_readlane_b32 s17, v31, 61 -; GCN-NEXT: v_readlane_b32 s18, v31, 62 -; GCN-NEXT: v_readlane_b32 s19, v31, 63 -; GCN-NEXT: s_mov_b64 s[2:3], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s4, v2, 48 +; GCN-NEXT: v_readlane_b32 s5, v2, 49 +; GCN-NEXT: v_readlane_b32 s6, v2, 50 +; GCN-NEXT: v_readlane_b32 s7, v2, 51 +; GCN-NEXT: v_readlane_b32 s8, v2, 52 +; GCN-NEXT: v_readlane_b32 s9, v2, 53 +; GCN-NEXT: v_readlane_b32 s10, v2, 54 +; GCN-NEXT: v_readlane_b32 s11, v2, 55 +; GCN-NEXT: v_readlane_b32 s12, v2, 56 +; GCN-NEXT: v_readlane_b32 s13, v2, 57 +; GCN-NEXT: v_readlane_b32 s14, v2, 58 +; GCN-NEXT: v_readlane_b32 s15, v2, 59 +; GCN-NEXT: v_readlane_b32 s16, v2, 60 +; GCN-NEXT: v_readlane_b32 s17, v2, 61 +; GCN-NEXT: v_readlane_b32 s18, v2, 62 +; GCN-NEXT: v_readlane_b32 s19, v2, 63 ; GCN-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-NEXT: v_readlane_b32 s1, v1, 1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[2:3] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -1106,6 +1199,14 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ; use v0 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB3_2: ; %ret +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir index c2a3aae595afa..fe46379400452 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir @@ -39,7 +39,7 @@ body: | ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v1 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF @@ -49,7 +49,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 @@ -77,7 +77,7 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v1 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF @@ -87,7 +87,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 @@ -137,10 +137,10 @@ body: | ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v2 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -151,7 +151,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -161,7 +161,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v2 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -187,10 +187,10 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v2 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -201,7 +201,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -211,7 +211,7 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF SI_SPILL_AV64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -258,14 +258,14 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v3 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -277,7 +277,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -290,8 +290,8 @@ body: | ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v3 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -320,14 +320,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v3 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -339,7 +339,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -352,8 +352,8 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF SI_SPILL_AV96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) @@ -405,16 +405,16 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v4 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -427,7 +427,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -443,9 +443,9 @@ body: | ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v4 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -479,16 +479,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v4 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -501,7 +501,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -517,9 +517,9 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) @@ -576,18 +576,18 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v5 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -603,7 +603,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -622,10 +622,10 @@ body: | ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v5 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -664,18 +664,18 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v5 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -691,7 +691,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -710,10 +710,10 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF SI_SPILL_AV160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) @@ -775,20 +775,20 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v6 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -805,7 +805,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -827,11 +827,11 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v6 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -875,20 +875,20 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -905,7 +905,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -927,11 +927,11 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF SI_SPILL_AV192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) @@ -998,22 +998,22 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v7 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1031,7 +1031,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1056,12 +1056,12 @@ body: | ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v7 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -1110,22 +1110,22 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v7 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1143,7 +1143,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1168,12 +1168,12 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF SI_SPILL_AV224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5) @@ -1245,24 +1245,24 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v8 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1281,7 +1281,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1309,13 +1309,13 @@ body: | ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v8 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -1369,24 +1369,24 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v8 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1405,7 +1405,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1433,13 +1433,13 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF SI_SPILL_AV256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) @@ -1551,40 +1551,40 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v16 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1615,7 +1615,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1654,34 +1654,34 @@ body: | ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v16 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -1775,40 +1775,40 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1839,7 +1839,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1878,34 +1878,34 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF SI_SPILL_AV512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) @@ -2097,72 +2097,72 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_v32 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2217,7 +2217,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2288,66 +2288,66 @@ body: | ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_v32 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -2521,72 +2521,72 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v32 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2641,7 +2641,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2712,66 +2712,66 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF SI_SPILL_AV1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) @@ -2813,7 +2813,7 @@ body: | ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a1 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF @@ -2825,7 +2825,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-V2A: liveins: $vgpr0 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 @@ -2853,7 +2853,7 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a1 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF @@ -2863,7 +2863,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 @@ -2917,10 +2917,10 @@ body: | ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a2 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -2937,7 +2937,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2947,7 +2947,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a2 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -2973,10 +2973,10 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a2 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -2987,7 +2987,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -2997,7 +2997,7 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1 = IMPLICIT_DEF SI_SPILL_AV64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -3050,14 +3050,14 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a3 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3079,7 +3079,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3089,11 +3089,11 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a3 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -3122,14 +3122,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a3 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3141,7 +3141,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3154,8 +3154,8 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2 = IMPLICIT_DEF SI_SPILL_AV96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) @@ -3215,16 +3215,16 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a4 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3251,7 +3251,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3263,13 +3263,13 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a4 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -3303,16 +3303,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a4 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3325,7 +3325,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3341,9 +3341,9 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF SI_SPILL_AV128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) @@ -3410,18 +3410,18 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a5 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3453,7 +3453,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3467,15 +3467,15 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a5 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -3514,18 +3514,18 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a5 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3541,7 +3541,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3560,10 +3560,10 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF SI_SPILL_AV160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) @@ -3637,20 +3637,20 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a6 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3687,7 +3687,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3703,17 +3703,17 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a6 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -3757,20 +3757,20 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3787,7 +3787,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3809,11 +3809,11 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF SI_SPILL_AV192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) @@ -3894,22 +3894,22 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a7 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -3951,7 +3951,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -3969,19 +3969,19 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a7 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -4030,22 +4030,22 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a7 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -4063,7 +4063,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4088,12 +4088,12 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF SI_SPILL_AV224_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5) @@ -4181,24 +4181,24 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a8 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -4245,7 +4245,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4265,21 +4265,21 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a8 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -4333,24 +4333,24 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a8 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -4369,7 +4369,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4397,13 +4397,13 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF SI_SPILL_AV256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) @@ -4547,40 +4547,40 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a16 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -4667,7 +4667,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4703,37 +4703,37 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a16 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -4827,40 +4827,40 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -4891,7 +4891,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -4930,34 +4930,34 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF SI_SPILL_AV512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) @@ -5213,72 +5213,72 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_av_a32 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -5445,7 +5445,7 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -5513,69 +5513,69 @@ body: | ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_av_a32 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 @@ -5749,72 +5749,72 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a32 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -5869,7 +5869,7 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} - ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -5940,66 +5940,66 @@ body: | ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF SI_SPILL_AV1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index b92e70cadcfb8..db34185036c93 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -67,12 +67,12 @@ body: | ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -114,12 +114,12 @@ body: | ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 4, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -159,18 +159,18 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -182,8 +182,8 @@ body: | ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF @@ -217,19 +217,19 @@ body: | ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 4, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -242,9 +242,9 @@ body: | ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5) - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) @@ -277,22 +277,22 @@ body: | ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -308,11 +308,11 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s196) into %stack.0, align 4, addrspace 5) @@ -350,23 +350,23 @@ body: | ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 12, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -383,10 +383,10 @@ body: | ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) - ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) @@ -439,10 +439,10 @@ body: | ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) - ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 4, addrspace 5) @@ -455,16 +455,16 @@ body: | ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 32, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 36, addrspace 5) ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 40, addrspace 5) - ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} - ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -485,7 +485,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s96) into %stack.0 + 32, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -493,12 +493,12 @@ body: | ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s96) from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir index 35e0edd8ff10c..4e35ce55635d8 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir @@ -21,76 +21,80 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v1 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: $vgpr0 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v1 ; MUBUF-V2A: liveins: $agpr0 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: $vgpr0 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: $vgpr0 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v1 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: $vgpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v1 ; FLATSCR-V2A: liveins: $agpr0 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: $vgpr0 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v1 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0 = IMPLICIT_DEF SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) @@ -111,10 +115,10 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v2 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -122,47 +126,49 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v2 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -170,41 +176,43 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v2 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) @@ -225,11 +233,11 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v3 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -239,57 +247,59 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v3 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -299,50 +309,52 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v3 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) @@ -363,12 +375,12 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v4 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -380,67 +392,69 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v4 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -452,59 +466,61 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v4 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -525,13 +541,13 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v5 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -545,39 +561,40 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v5 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -585,39 +602,40 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -631,39 +649,40 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v5 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -671,30 +690,31 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) @@ -715,14 +735,14 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v6 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -738,44 +758,45 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v6 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -783,44 +804,45 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -836,44 +858,45 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -881,34 +904,35 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) @@ -929,16 +953,16 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v8 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -958,54 +982,55 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v8 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -1013,54 +1038,55 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1080,54 +1106,55 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v8 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -1135,42 +1162,43 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) @@ -1191,24 +1219,24 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v16 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1244,94 +1272,95 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v16 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1343,94 +1372,95 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1466,94 +1496,95 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1565,74 +1596,75 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) @@ -1653,40 +1685,40 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v32 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1754,174 +1786,175 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; MUBUF-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v32 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1941,174 +1974,175 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; FLATSCR-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2176,174 +2210,175 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v32 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -2363,138 +2398,139 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR-GFX90A-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr28, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr28, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF SI_SPILL_V1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) @@ -2515,9 +2551,9 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a1 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: $agpr0 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) @@ -2525,19 +2561,20 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a1 ; MUBUF-V2A: liveins: $vgpr0 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: $agpr0 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: $agpr0 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a1 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: $agpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) @@ -2545,50 +2582,53 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a1 ; FLATSCR-V2A: liveins: $vgpr0 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: $agpr0 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: $agpr0 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: $agpr0 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a1 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0 = IMPLICIT_DEF SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) @@ -2609,10 +2649,10 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a2 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2624,24 +2664,25 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a2 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: $agpr0_agpr1 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a2 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2653,24 +2694,25 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: $agpr0_agpr1 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -2678,41 +2720,43 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a2 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1 = IMPLICIT_DEF SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) @@ -2733,11 +2777,11 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a3 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2753,29 +2797,30 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a3 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a3 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2791,29 +2836,30 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2823,50 +2869,52 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a3 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2 = IMPLICIT_DEF SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) @@ -2887,12 +2935,12 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a4 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2912,34 +2960,35 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a4 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a4 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2959,34 +3008,35 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2998,59 +3048,61 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a4 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -3071,13 +3123,13 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a5 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3101,39 +3153,40 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a5 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a5 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3157,39 +3210,40 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3203,39 +3257,40 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a5 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -3243,30 +3298,31 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) @@ -3287,14 +3343,14 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a6 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3322,44 +3378,45 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a6 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a6 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3387,44 +3444,45 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3440,44 +3498,45 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -3485,34 +3544,35 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) @@ -3533,16 +3593,16 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a8 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3578,54 +3638,55 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a8 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a8 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3661,54 +3722,55 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3728,54 +3790,55 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a8 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -3783,42 +3846,43 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) @@ -3839,24 +3903,24 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a16 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3924,94 +3988,95 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a16 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a16 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4079,94 +4144,95 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -4202,94 +4268,95 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -4301,74 +4368,75 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) @@ -4389,40 +4457,40 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a32 ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4554,174 +4622,175 @@ body: | ; MUBUF-NEXT: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a32 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; MUBUF-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-V2A: S_ENDPGM 0 + ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec + ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; MUBUF-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; MUBUF-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; MUBUF-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; MUBUF-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; MUBUF-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; MUBUF-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; MUBUF-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; MUBUF-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_a32 - ; FLATSCR: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4853,174 +4922,175 @@ body: | ; FLATSCR-NEXT: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; FLATSCR-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-V2A: S_ENDPGM 0 + ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec + ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; FLATSCR-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -5088,174 +5158,175 @@ body: | ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; MUBUF-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; MUBUF-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; MUBUF-GFX90A-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF-GFX90A-V2A: S_ENDPGM 0 + ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a32 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -5275,138 +5346,139 @@ body: | ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 - ; FLATSCR-GFX90A-V2A: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 - ; FLATSCR-GFX90A-V2A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; FLATSCR-GFX90A-V2A: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 + ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; FLATSCR-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-cfi-saves-bug.ll b/llvm/test/CodeGen/AMDGPU/pei-cfi-saves-bug.ll index 8a352e6428caa..b5f144c17ef70 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-cfi-saves-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/pei-cfi-saves-bug.ll @@ -7,12 +7,12 @@ define fastcc void @tail_callee() #2 { ; CHECK-LABEL: tail_callee: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: v_writelane_b32 v0, exec_lo, 0 ; CHECK-NEXT: v_writelane_b32 v0, exec_hi, 1 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) @@ -25,13 +25,12 @@ define fastcc void @callee_no_fp() #0 { ; CHECK-LABEL: callee_no_fp: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[16:17] ; CHECK-NEXT: v_writelane_b32 v1, exec_lo, 2 ; CHECK-NEXT: v_writelane_b32 v1, exec_hi, 3 -; CHECK-NEXT: v_writelane_b32 v1, s33, 4 -; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v1, s31, 1 @@ -49,27 +48,26 @@ entry: define protected amdgpu_kernel void @kernel() #1 { ; CHECK-LABEL: kernel: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; CHECK-NEXT: s_add_u32 s0, s0, s17 +; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s32, 0 ; CHECK-NEXT: s_cbranch_scc0 .LBB2_2 ; CHECK-NEXT: ; %bb.1: ; %end ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: .LBB2_2: ; %body -; CHECK-NEXT: s_getpc_b64 s[12:13] -; CHECK-NEXT: s_add_u32 s12, s12, callee_no_fp@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s13, s13, callee_no_fp@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[18:19], s[12:13], 0x0 +; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9] +; CHECK-NEXT: s_getpc_b64 s[8:9] +; CHECK-NEXT: s_add_u32 s8, s8, callee_no_fp@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s9, s9, callee_no_fp@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[16:17], s[8:9], 0x0 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 -; CHECK-NEXT: s_mov_b32 s12, s14 -; CHECK-NEXT: s_mov_b32 s13, s15 -; CHECK-NEXT: s_mov_b32 s14, s16 +; CHECK-NEXT: s_mov_b64 s[8:9], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] entry: br i1 undef, label %end, label %body @@ -89,13 +87,14 @@ define dso_local fastcc void @func_needs_fp() unnamed_addr #0 { ; CHECK-NEXT: .type func_needs_fp$local,@function ; CHECK-NEXT: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[16:17] +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v40, s16, 4 ; CHECK-NEXT: v_writelane_b32 v40, exec_lo, 2 ; CHECK-NEXT: v_writelane_b32 v40, exec_hi, 3 -; CHECK-NEXT: v_writelane_b32 v40, s33, 4 -; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 8ff5d406aeaa6..2f23787cc581b 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -58,36 +58,38 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; CHECK-NEXT: $sgpr4 = COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr3 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr33 = COPY $sgpr4 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -156,18 +158,18 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr2 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -235,14 +237,14 @@ body: | ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr29 + ; CHECK-NEXT: $vgpr0 = PRED_COPY killed $sgpr29 ; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr29 + ; CHECK-NEXT: $vgpr2 = PRED_COPY killed $sgpr29 ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -315,8 +317,8 @@ body: | ; CHECK-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index f43b2311da70e..f5cdd9edbefb6 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -54,15 +54,16 @@ body: | ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -71,18 +72,19 @@ body: | ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; MUBUF-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4 ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs ; FLATSCR: liveins: $vgpr1, $vgpr2 ; FLATSCR-NEXT: {{ $}} - ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4_lo16 @@ -111,16 +113,17 @@ body: | ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 + ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc @@ -129,13 +132,14 @@ body: | ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc + ; FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc + ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) + ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc - ; FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 ; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index 0abd93c04ffb4..832fb2d3a8091 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -53,31 +53,33 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 262400, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 262400 - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; CHECK-NEXT: $sgpr4 = COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc + ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 262400 + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 4096, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 + ; CHECK-NEXT: $vgpr3 = PRED_COPY killed $sgpr33 ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -786432, implicit-def dead $scc - ; CHECK-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 262400, implicit-def dead $scc - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr33 = COPY $sgpr4 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index 5ea2553e604d3..912aec808a453 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -24,7 +24,7 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX8-LABEL: name: pei_scavenge_vgpr_spill - ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX8: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -59,15 +59,16 @@ body: | ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GFX8-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; GFX8-NEXT: $sgpr4 = COPY $sgpr33 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -78,17 +79,18 @@ body: | ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384 ; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec + ; GFX8-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX8-NEXT: $sgpr33 = COPY $sgpr4 ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX9-LABEL: name: pei_scavenge_vgpr_spill - ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GFX9-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -123,15 +125,16 @@ body: | ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GFX9-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; GFX9-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 ; GFX9-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -140,19 +143,20 @@ body: | ; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec + ; GFX9-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill - ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -185,28 +189,30 @@ body: | ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 - ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 - ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; GFX9-FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 + ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832 + ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX9-FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 - ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GFX9-FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll index 5049f4248976d..06691927e3ab8 100644 --- a/llvm/test/CodeGen/AMDGPU/perfhint.ll +++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll @@ -144,9 +144,10 @@ bb: ret void } +; FIXME: This test was intended to be WaveLimiterHint : 0 ; GCN-LABEL: {{^}}test_indirect_through_phi: ; GCN: MemoryBound: 0 -; GCN: WaveLimiterHint : 0 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_indirect_through_phi(float addrspace(1)* %arg) { bb: %load = load float, float addrspace(1)* %arg, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir index e2e6ea76103c7..748bfad168ddc 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir @@ -27,7 +27,7 @@ body: | # CHECK-LABEL: name: foo # CHECK: bb.3: # CHECK-NEXT: dead %2:sreg_32_xm0 = IMPLICIT_DEF -# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4 +# CHECK-NEXT: %3:sreg_32_xm0 = PRED_COPY killed %4 # CHECK-NEXT: S_NOP 0, implicit killed %3 @@ -36,8 +36,8 @@ body: | # With this PHI node order we did not hit the assert, but we used to get # # bb.3: -# dead %3:sreg_32_xm0 = COPY killed %4 -# %2:sreg_32_xm0 = COPY %4 +# dead %3:sreg_32_xm0 = PRED_COPY killed %4 +# %2:sreg_32_xm0 = PRED_COPY %4 # S_NOP 0, implicit killed %2 # # which looks weird regarding killed flags for %4. @@ -65,7 +65,7 @@ body: | # CHECK-LABEL: name: bar # CHECK: bb.3: # CHECK-NEXT: dead %3:sreg_32_xm0 = IMPLICIT_DEF -# CHECK-NEXT: %2:sreg_32_xm0 = COPY killed %4 +# CHECK-NEXT: %2:sreg_32_xm0 = PRED_COPY killed %4 # CHECK-NEXT: S_NOP 0, implicit killed %2 @@ -92,4 +92,4 @@ body: | # CHECK-LABEL: name: bax # CHECK: bb.3: -# CHECK-NEXT: %2:sreg_32_xm0 = COPY killed %3 +# CHECK-NEXT: %2:sreg_32_xm0 = PRED_COPY killed %3 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir index d465e9cbd6b47..9e75527645efc 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -7,7 +7,7 @@ # CHECK: [[IF_INPUT_REG:%[0-9]+]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE0]], implicit $exec # CHECK: bb.1: -# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = COPY killed [[IF_INPUT_REG]] +# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = PRED_COPY killed [[IF_INPUT_REG]] # CHECK: SI_END_CF killed [[END_CF_ARG]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec # CHECK: bb.2: @@ -25,7 +25,7 @@ body: | successors: %bb.3(0x40000000), %bb.2(0x40000000) liveins: $vgpr0 - %5:vgpr_32(s32) = COPY $vgpr0 + %5:vgpr_32(s32) = PRED_COPY $vgpr0 %0:sreg_64 = V_CMP_EQ_U32_e64 0, %5(s32), implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %22:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -47,7 +47,7 @@ body: | %16:sreg_32_xm0 = S_MOV_B32 -1 %17:sgpr_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - %19:vgpr_32 = COPY %4 + %19:vgpr_32 = PRED_COPY %4 %20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll b/llvm/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll deleted file mode 100644 index 7a2e954866cbd..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=fiji -print-after=amdgpu-isel -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SI %s - -; This checks that the -print-after of MIR containing a target custom pseudo -; value works correctly. - -; SI: ImageResource - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn--amdpal" - -define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr { -.entry: - %res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) - ret <2 x float> %res -} - -declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll index c5c6467550ef9..11027455934ea 100644 --- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ b/llvm/test/CodeGen/AMDGPU/private-element-size.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s ; ALL-LABEL: {{^}}private_elt_size_v4i32: @@ -252,3 +252,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll index 15e8604930d59..7cc4d4fc0ecb0 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -80,7 +80,7 @@ declare i32 @foo(i32 addrspace(5)*) #0 ; ASM: buffer_store_dword ; ASM: buffer_store_dword ; ASM: s_swappc_b64 -; ASM: ScratchSize: 16400 +; ASM: ScratchSize: 16 define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 { entry: %tmp = alloca [2 x i32], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll index 27d2f03283093..158a5da61cf0d 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s -; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s +; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s ; ALL-LABEL: {{^}}promote_alloca_i32_array_array: ; NOOPTS: workgroup_group_segment_byte_size = 0{{$}} @@ -36,3 +36,6 @@ entry: attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" } attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll index d63bd451da53f..99c28418c6ec5 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -disable-promote-alloca-to-vector -amdgpu-enable-lower-module-lds=0 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -disable-promote-alloca-to-vector -amdgpu-enable-lower-module-lds=0 < %s | FileCheck -check-prefix=GCN %s ; This shows that the amount LDS size estimate should try to not be ; sensitive to the order of the LDS globals. This should try to @@ -130,3 +130,6 @@ entry: } attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll new file mode 100644 index 0000000000000..210d04265ccae --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-propagate-attributes-late %s | FileCheck %s + +; CHECK: define internal void @max_flat_1_1024() #0 { +define internal void @max_flat_1_1024() #0 { + ret void +} + +; CHECK: define internal void @max_flat_1_256() #1 { +define internal void @max_flat_1_256() #1 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_default() #1 { +define amdgpu_kernel void @kernel_1_256_call_default() #1 { + call void @default() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { +define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { + call void @max_flat_1_256() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { +define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { + call void @max_flat_64_64() + ret void +} + +; CHECK: define internal void @max_flat_64_64() #2 { +define internal void @max_flat_64_64() #2 { + ret void +} + +; CHECK: define internal void @default() #2 { +define internal void @default() #3 { + ret void +} + +attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" } +attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" } +attributes #2 = { noinline "amdgpu-flat-work-group-size"="64,64" } +attributes #3 = { noinline } + +; CHECK: attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" +; CHECK-NEXT: attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" +; CHECK-NEXT: attributes #2 = { noinline "amdgpu-flat-work-group-size"="1,256" diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll deleted file mode 100644 index d2ae6cf60681a..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ /dev/null @@ -1,214 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s - -; Check propagation of amdgpu-flat-work-group-size attribute. - -; Called from a single kernel with 1,256 -define internal void @default_to_1_256() { -; CHECK-LABEL: define {{[^@]+}}@default_to_1_256 -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -define amdgpu_kernel void @kernel_1_256() #0 { -; CHECK-LABEL: define {{[^@]+}}@kernel_1_256 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: call void @default_to_1_256() -; CHECK-NEXT: ret void -; - call void @default_to_1_256() - ret void -} - -; Called from a single kernel with 64,128 -define internal void @default_to_64_128() { -; CHECK-LABEL: define {{[^@]+}}@default_to_64_128 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -define amdgpu_kernel void @kernel_64_128() #1 { -; CHECK-LABEL: define {{[^@]+}}@kernel_64_128 -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @default_to_64_128() -; CHECK-NEXT: call void @flat_group_64_64() -; CHECK-NEXT: call void @default_to_64_256() -; CHECK-NEXT: call void @flat_group_128_256() -; CHECK-NEXT: ret void -; - call void @default_to_64_128() - call void @flat_group_64_64() - call void @default_to_64_256() - call void @flat_group_128_256() - ret void -} - -; Called from kernels with 128,512 and 512,512 -define internal void @default_to_128_512() { -; CHECK-LABEL: define {{[^@]+}}@default_to_128_512 -; CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -; This already has a strict bounds, but called from kernels with wider -; bounds, and should not be changed. -define internal void @flat_group_64_64() #2 { -; CHECK-LABEL: define {{[^@]+}}@flat_group_64_64 -; CHECK-SAME: () #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -; 128,256 -> 128,128 -define internal void @flat_group_128_256() #3 { -; CHECK-LABEL: define {{[^@]+}}@flat_group_128_256 -; CHECK-SAME: () #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -define internal void @flat_group_512_1024() #4 { -; CHECK-LABEL: define {{[^@]+}}@flat_group_512_1024 -; CHECK-SAME: () #[[ATTR5:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -define amdgpu_kernel void @kernel_128_512() #5 { -; CHECK-LABEL: define {{[^@]+}}@kernel_128_512 -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: call void @default_to_128_512() -; CHECK-NEXT: call void @flat_group_64_64() -; CHECK-NEXT: ret void -; - call void @default_to_128_512() - call void @flat_group_64_64() - ret void -} - -define amdgpu_kernel void @kernel_512_512() #6 { -; CHECK-LABEL: define {{[^@]+}}@kernel_512_512 -; CHECK-SAME: () #[[ATTR5]] { -; CHECK-NEXT: call void @default_to_128_512() -; CHECK-NEXT: call void @flat_group_512_1024() -; CHECK-NEXT: ret void -; - call void @default_to_128_512() - call void @flat_group_512_1024() - ret void -} - -; Called from kernels with 128,256 and 64,128 => 64,256 -define internal void @default_to_64_256() { -; CHECK-LABEL: define {{[^@]+}}@default_to_64_256 -; CHECK-SAME: () #[[ATTR6:[0-9]+]] { -; CHECK-NEXT: ret void -; - ret void -} - -; The kernel's lower bound is higher than the callee's lower bound, so -; this should probably be illegal. -define amdgpu_kernel void @kernel_128_256() #3 { -; CHECK-LABEL: define {{[^@]+}}@kernel_128_256 -; CHECK-SAME: () #[[ATTR7:[0-9]+]] { -; CHECK-NEXT: call void @default_to_64_256() -; CHECK-NEXT: ret void -; - call void @default_to_64_256() - ret void -} - -; 64,128 -> 64,128 -define internal void @merge_cycle_0() #1 { -; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0 -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @merge_cycle_1() -; CHECK-NEXT: ret void -; - call void @merge_cycle_1() - ret void -} - -; 128,256 -> 128,128 -define internal void @merge_cycle_1() #3 { -; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1 -; CHECK-SAME: () #[[ATTR4]] { -; CHECK-NEXT: call void @merge_cycle_0() -; CHECK-NEXT: ret void -; - call void @merge_cycle_0() - ret void -} - -define amdgpu_kernel void @kernel_64_256() #7 { -; CHECK-LABEL: define {{[^@]+}}@kernel_64_256 -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @merge_cycle_0() -; CHECK-NEXT: call void @default_captured_address() -; CHECK-NEXT: call void @externally_visible_default() -; CHECK-NEXT: [[F32:%.*]] = call float bitcast (i32 ()* @bitcasted_function to float ()*)() -; CHECK-NEXT: ret void -; - call void @merge_cycle_0() - call void @default_captured_address() - call void @externally_visible_default() - %f32 = call float bitcast (i32 ()* @bitcasted_function to float ()*)() - ret void -} - -define internal void @default_captured_address() { -; CHECK-LABEL: define {{[^@]+}}@default_captured_address -; CHECK-SAME: () #[[ATTR8:[0-9]+]] { -; CHECK-NEXT: store volatile void ()* @default_captured_address, void ()** undef, align 8 -; CHECK-NEXT: ret void -; - store volatile void ()* @default_captured_address, void ()** undef, align 8 - ret void -} - -define void @externally_visible_default() { -; CHECK-LABEL: define {{[^@]+}}@externally_visible_default -; CHECK-SAME: () #[[ATTR8]] { -; CHECK-NEXT: ret void -; - ret void -} - -; 1,1024 -> 64,256 -define internal i32 @bitcasted_function() { -; CHECK-LABEL: define {{[^@]+}}@bitcasted_function -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: ret i32 0 -; - ret i32 0 -} - -attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } -attributes #1 = { "amdgpu-flat-work-group-size"="64,128" } -attributes #2 = { "amdgpu-flat-work-group-size"="64,64" } -attributes #3 = { "amdgpu-flat-work-group-size"="128,256" } -attributes #4 = { "amdgpu-flat-work-group-size"="512,1024" } -attributes #5 = { "amdgpu-flat-work-group-size"="128,512" } -attributes #6 = { "amdgpu-flat-work-group-size"="512,512" } -attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } -;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir index b1f864fa92eac..52653df26d9f7 100644 --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -86,8 +86,8 @@ body: | ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 - ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = PRED_COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + ; CHECK-NEXT: renamable $sgpr52 = PRED_COPY renamable $sgpr68 ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) @@ -126,8 +126,8 @@ body: | ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr33 - ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr60 = PRED_COPY killed renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr62 = PRED_COPY killed renamable $sgpr15 ; CHECK-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr16, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, CustomRegMask($sgpr60,$sgpr62) @@ -158,7 +158,7 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead %27:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec + ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) @@ -166,7 +166,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec ; CHECK-NEXT: renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: dead %30:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) @@ -182,40 +182,40 @@ body: | ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1) - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec ; CHECK-NEXT: renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35 - ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = PRED_COPY killed renamable $sgpr6_sgpr7 ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr52_sgpr53 - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11 + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = PRED_COPY killed renamable $sgpr10_sgpr11 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr12_sgpr13 + ; CHECK-NEXT: renamable $sgpr42_sgpr43 = PRED_COPY killed renamable $sgpr12_sgpr13 ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr33 ; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr36 = COPY killed renamable $sgpr16 - ; CHECK-NEXT: renamable $sgpr37 = COPY killed renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr40 = COPY killed renamable $sgpr8 - ; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr18_sgpr19 - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr20_sgpr21 - ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr22_sgpr23 - ; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr24_sgpr25 + ; CHECK-NEXT: renamable $sgpr36 = PRED_COPY killed renamable $sgpr16 + ; CHECK-NEXT: renamable $sgpr37 = PRED_COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr40 = PRED_COPY killed renamable $sgpr8 + ; CHECK-NEXT: renamable $sgpr44_sgpr45 = PRED_COPY killed renamable $sgpr18_sgpr19 + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = PRED_COPY killed renamable $sgpr20_sgpr21 + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = PRED_COPY killed renamable $sgpr22_sgpr23 + ; CHECK-NEXT: renamable $sgpr50_sgpr51 = PRED_COPY killed renamable $sgpr24_sgpr25 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 - ; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr50_sgpr51 - ; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr48_sgpr49 - ; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr46_sgpr47 - ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr44_sgpr45 - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr42_sgpr43 - ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr40 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr37 - ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr36 - ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr24_sgpr25 = PRED_COPY killed renamable $sgpr50_sgpr51 + ; CHECK-NEXT: renamable $sgpr22_sgpr23 = PRED_COPY killed renamable $sgpr48_sgpr49 + ; CHECK-NEXT: renamable $sgpr20_sgpr21 = PRED_COPY killed renamable $sgpr46_sgpr47 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = PRED_COPY killed renamable $sgpr44_sgpr45 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr42_sgpr43 + ; CHECK-NEXT: renamable $sgpr8 = PRED_COPY killed renamable $sgpr40 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = PRED_COPY killed renamable $sgpr38_sgpr39 + ; CHECK-NEXT: renamable $sgpr15 = PRED_COPY killed renamable $sgpr37 + ; CHECK-NEXT: renamable $sgpr16 = PRED_COPY killed renamable $sgpr36 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = PRED_COPY killed renamable $sgpr52_sgpr53 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr92_sgpr93 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll index 9aedfad6fe320..484e3432c4d0f 100644 --- a/llvm/test/CodeGen/AMDGPU/recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -32,7 +32,7 @@ define void @tail_recursive_with_stack() { ; For an arbitrary recursive call, report a large number for unknown stack ; usage for code object v4 and older ; CHECK-LABEL: {{^}}calls_recursive: -; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}} +; CHECK: .amdhsa_private_segment_fixed_size 16{{$}} ; ; V5-LABEL: {{^}}calls_recursive: ; V5: .amdhsa_private_segment_fixed_size 0{{$}} @@ -56,7 +56,7 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() { ; in the kernel. ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive: -; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +; CHECK: .amdhsa_private_segment_fixed_size 0{{$}} ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive: ; V5: .amdhsa_private_segment_fixed_size 0{{$}} @@ -67,7 +67,7 @@ define amdgpu_kernel void @kernel_calls_tail_recursive() { } ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: -; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}} +; CHECK: .amdhsa_private_segment_fixed_size 8{{$}} ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: ; V5: .amdhsa_private_segment_fixed_size 8{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir index 09be927dc952e..649f15fd59567 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir @@ -52,13 +52,13 @@ body: | ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:vreg_256 = PRED_COPY [[SI_SPILL_V256_RESTORE1]] ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit [[COPY]] + ; CHECK-NEXT: S_NOP 0, implicit [[PRED_COPY]] ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]] ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir index 258377f5668d1..d4f55dedae181 100644 --- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir +++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir @@ -40,10 +40,10 @@ name: tbuffer_store2 body: | bb.0: ; CHECK-LABEL: name: tbuffer_store2 - ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; CHECK-NEXT: S_ENDPGM 0 - TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) S_ENDPGM 0 ... @@ -340,10 +340,10 @@ name: image_store body: | bb.0: ; CHECK-LABEL: name: image_store - ; CHECK: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "ImageResource") + ; CHECK: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; CHECK-NEXT: S_ENDPGM 0 - IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "ImageResource") + IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) S_ENDPGM 0 ... @@ -403,9 +403,9 @@ name: image_atomic body: | bb.0: ; CHECK-LABEL: name: image_atomic - ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll new file mode 100644 index 0000000000000..56fb98784392c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll @@ -0,0 +1,455 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=bonaire -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX7,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX7 %s < %t +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX8,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX8 %s < %t +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx906 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX906,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX906 %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx90a -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX90A,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX90A %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx1011 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX10,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX10 %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx1100 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX11,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX11 %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s + +; Note: This test checks the IR, but also has a run line to codegen the file just to check we +; do not crash when trying to select those functions. + +; WARN-GFX7: needs_dpp: removing function: +dpp is not supported on the current target +; WARN-GFX7: needs_16bit_insts: removing function: +16-bit-insts is not supported on the current target +; WARN-GFX7: needs_gfx8_insts: removing function: +gfx8-insts is not supported on the current target +; WARN-GFX7: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target +; WARN-GFX7: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX7: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX7: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX7: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX7: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX7: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX7: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX7: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX7: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target +; WARN-GFX7: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX7-NOT: not supported + +; WARN-GFX8: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target +; WARN-GFX8: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX8: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX8: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX8: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX8: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX8: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX8: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX8: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX8: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target +; WARN-GFX8: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX8-NOT: not supported + +; WARN-GFX906: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX906: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX906: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX906: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX906: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX906: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX906: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX906-NOT: not supported + +; WARN-GFX90A: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX90A: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX90A: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX90A-NOT: not supported + +; WARN-GFX10: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX10: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX10: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX10: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX10-NOT: not supported + +; WARN-GFX11: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX11: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX11: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX11: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX11: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX11-NOT: not supported + +; GFX7: @GVRefs {{.*}} zeroinitializer +; GFX8: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null] +; GFX906: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot7_insts, ptr null] +; GFX90A: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr @needs_dot3_insts, ptr @needs_dot4_insts, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null] +; GFX10: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null] +; GFX11: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr @needs_gfx11_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot5_insts, ptr null, ptr @needs_dot7_insts, ptr @needs_dot8_insts] +@GVRefs = internal global [14 x ptr] [ + ptr @needs_dpp, + ptr @needs_16bit_insts, + ptr @needs_gfx8_insts, + ptr @needs_gfx9_insts, + ptr @needs_gfx10_insts, + ptr @needs_gfx11_insts, + ptr @needs_dot1_insts, + ptr @needs_dot2_insts, + ptr @needs_dot3_insts, + ptr @needs_dot4_insts, + ptr @needs_dot5_insts, + ptr @needs_dot6_insts, + ptr @needs_dot7_insts, + ptr @needs_dot8_insts +] + +; GFX7: @ConstantExpr = internal global i64 0 +@ConstantExpr = internal global i64 ptrtoint (ptr @needs_dpp to i64) + +define void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #0 { +; GFX7-NOT: define void @needs_dpp( +; GFX8: define void @needs_dpp( +; GFX9: define void @needs_dpp( +; GFX10: define void @needs_dpp( +; GFX11: define void @needs_dpp( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #1 { +; GFX7-NOT: define void @needs_16bit_insts( +; GFX8: define void @needs_16bit_insts( +; GFX9: define void @needs_16bit_insts( +; GFX10: define void @needs_16bit_insts( +; GFX11: define void @needs_16bit_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #2 { +; GFX7-NOT: define void @needs_gfx8_insts( +; GFX8: define void @needs_gfx8_insts( +; GFX9: define void @needs_gfx8_insts( +; GFX10: define void @needs_gfx8_insts( +; GFX11: define void @needs_gfx8_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #3 { +; GFX7-NOT: define void @needs_gfx9_insts( +; GFX8-NOT: define void @needs_gfx9_insts( +; GFX9: define void @needs_gfx9_insts( +; GFX10: define void @needs_gfx9_insts( +; GFX11: define void @needs_gfx9_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #4 { +; GFX7-NOT: define void @needs_gfx10_insts( +; GFX8-NOT: define void @needs_gfx10_insts( +; GFX9-NOT: define void @needs_gfx10_insts( +; GFX10: define void @needs_gfx10_insts( +; GFX11: define void @needs_gfx10_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #5 { +; GFX7-NOT: define void @needs_gfx11_insts( +; GFX8-NOT: define void @needs_gfx11_insts( +; GFX9-NOT: define void @needs_gfx11_insts( +; GFX10-NOT: define void @needs_gfx11_insts( +; GFX11: define void @needs_gfx11_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #6 { +; GFX7-NOT: define void @needs_dot1_insts( +; GFX8-NOT: define void @needs_dot1_insts( +; GFX9: define void @needs_dot1_insts( +; GFX10: define void @needs_dot1_insts( +; GFX11-NOT: define void @needs_dot1_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #7 { +; GFX7-NOT: define void @needs_dot2_insts( +; GFX8-NOT: define void @needs_dot2_insts( +; GFX9: define void @needs_dot2_insts( +; GFX10: define void @needs_dot2_insts( +; GFX11-NOT: define void @needs_dot2_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #8 { +; GFX7-NOT: define void @needs_dot3_insts( +; GFX8-NOT: define void @needs_dot3_insts( +; GFX906-NOT: define void @needs_dot3_insts( +; GFX90A: define void @needs_dot3_insts( +; GFX10-NOT: define void @needs_dot3_insts( +; GFX11-NOT: define void @needs_dot3_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + + +define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #9 { +; GFX7-NOT: define void @needs_dot4_insts( +; GFX8-NOT: define void @needs_dot4_insts( +; GFX906-NOT: define void @needs_dot4_insts( +; GFX90A: define void @needs_dot4_insts( +; GFX10-NOT: define void @needs_dot4_insts( +; GFX11-NOT: define void @needs_dot4_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #10 { +; GFX7-NOT: define void @needs_dot5_insts( +; GFX8-NOT: define void @needs_dot5_insts( +; GFX906-NOT: define void @needs_dot5_insts( +; GFX90A: define void @needs_dot5_insts( +; GFX10: define void @needs_dot5_insts( +; GFX11: define void @needs_dot5_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #11 { +; GFX7-NOT: define void @needs_dot6_insts( +; GFX8-NOT: define void @needs_dot6_insts( +; GFX906-NOT: define void @needs_dot6_insts( +; GFX90A: define void @needs_dot6_insts( +; GFX10: define void @needs_dot6_insts( +; GFX11-NOT: define void @needs_dot6_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #12 { +; GFX7-NOT: define void @needs_dot7_insts( +; GFX8-NOT: define void @needs_dot7_insts( +; GFX9: define void @needs_dot7_insts( +; GFX10: define void @needs_dot7_insts( +; GFX11: define void @needs_dot7_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #13 { +; GFX7-NOT: define void @needs_dot8_insts( +; GFX8-NOT: define void @needs_dot8_insts( +; GFX9-NOT: define void @needs_dot8_insts( +; GFX10-NOT: define void @needs_dot8_insts( +; GFX11: define void @needs_dot8_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +; IR: define void @caller( +define void @caller(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) { + ; GFX7: call void null( + ; GFX8: call void @needs_dpp( + ; GFX9: call void @needs_dpp( + ; GFX10: call void @needs_dpp( + ; GFX11: call void @needs_dpp( + call void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void @needs_16bit_insts( + ; GFX9: call void @needs_16bit_insts( + ; GFX10: call void @needs_16bit_insts( + ; GFX11: call void @needs_16bit_insts( + call void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void @needs_gfx8_insts( + ; GFX9: call void @needs_gfx8_insts( + ; GFX10: call void @needs_gfx8_insts( + ; GFX11: call void @needs_gfx8_insts( + call void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_gfx9_insts( + ; GFX10: call void @needs_gfx9_insts( + ; GFX111: call void @needs_gfx9_insts(c + call void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void @needs_gfx10_insts( + ; GFX111: call void @needs_gfx10_insts( + call void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void null( + ; GFX11: call void @needs_gfx11_insts( + call void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot1_insts( + ; GFX10: call void @needs_dot1_insts( + ; GFX11: call void null( + call void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot2_insts( + ; GFX10: call void @needs_dot2_insts( + ; GFX11: call void null( + call void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot3_insts( + ; GFX10: call void null( + ; GFX11: call void null( + call void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot4_insts( + ; GFX10: call void null( + ; GFX11: call void null( + call void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot5_insts( + ; GFX10: call void @needs_dot5_insts( + ; GFX11: call void @needs_dot5_insts( + call void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot6_insts( + ; GFX10: call void @needs_dot6_insts( + ; GFX11: call void null( + call void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot7_insts( + ; GFX10: call void @needs_dot7_insts( + ; GFX11: call void @needs_dot7_insts( + call void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void null( + ; GFX11: call void @needs_dot8_insts( + call void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; IR: ret void + ret void +} + +attributes #0 = { "target-features"="+dpp" } +attributes #1 = { "target-features"="+16-bit-insts" } +attributes #2 = { "target-features"="+gfx8-insts" } +attributes #3 = { "target-features"="+gfx9-insts" } +attributes #4 = { "target-features"="+gfx10-insts" } +attributes #5 = { "target-features"="+gfx11-insts" } +attributes #6 = { "target-features"="+dot1-insts" } +attributes #7 = { "target-features"="+dot2-insts" } +attributes #8 = { "target-features"="+dot3-insts" } +attributes #9 = { "target-features"="+dot4-insts" } +attributes #10 = { "target-features"="+dot5-insts" } +attributes #11 = { "target-features"="+dot6-insts" } +attributes #12 = { "target-features"="+dot7-insts" } +attributes #13 = { "target-features"="+dot8-insts" } diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll index d1ec9d8afaaf9..db52ce76acebb 100644 --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s target datalayout = "n32" @@ -500,3 +500,6 @@ attributes #3 = { nounwind "uniform-work-group-size"="false" } !1 = !{i32 8, i32 16} !2 = !{i64 8, i64 16, i64 2} !3 = !{i16 8, i16 16, i16 2} + +!llvm.module.flags = !{!4} +!4 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll index 1c8b8be33b0ad..27c910faf0081 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -o - %s | FileCheck -check-prefix=GCN-V5 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN-V5 %s ; Make sure there's no assertion when trying to report the resource ; usage for a function which becomes dead during codegen. @@ -21,7 +21,7 @@ define internal fastcc void @unreachable() { ; GCN: s_endpgm ; GCN: .amdhsa_private_segment_fixed_size 0 -; GCN-NOT: .amdhsa_uses_dynamic_stack 0 +; GCN: .amdhsa_uses_dynamic_stack 0 ; GCN-V5: .amdhsa_uses_dynamic_stack 0 define amdgpu_kernel void @entry() { bb0: diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress_cfi.ll b/llvm/test/CodeGen/AMDGPU/returnaddress_cfi.ll index 392892fdaae9c..01a8c4bf4f84e 100644 --- a/llvm/test/CodeGen/AMDGPU/returnaddress_cfi.ll +++ b/llvm/test/CodeGen/AMDGPU/returnaddress_cfi.ll @@ -11,11 +11,16 @@ define hidden void @_ZL3barv_spill_RA_to_vgpr() #0 { ; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 ; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; ; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2560, 12800 +; CHECK-NEXT: s_mov_b64 exec, -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill ; CHECK-NEXT: .cfi_offset 2600, 1228 -; CHECK-NEXT: s_mov_b64 exec, s[16:17] +; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK: v_writelane_b32 v40, s30, 32 ; CHECK-NEXT: v_writelane_b32 v40, s31, 33 @@ -36,9 +41,15 @@ define hidden void @_ZL3barv_spill_RA_to_vgpr() #0 { ; CHECK-DAG: v_readlane_b32 s30, v40, 32 ; CHECK-DAG: v_readlane_b32 s31, v40, 33 -; CHECK: s_or_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK: v_readlane_b32 s4, v40, 36 +; CHECK-NEXT: v_readlane_b32 s66, v40, 34 +; CHECK-NEXT: v_readlane_b32 s67, v40, 35 +; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -80,23 +91,33 @@ define hidden void @_ZL3barv_spill_RA_to_memory() #0 { ; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 ; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; ; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, s33 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 65, 24320 +; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2560, 21504 +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_mov_b32_e32 v1, s16 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 65, 22272 +; CHECK-NEXT: v_mov_b32_e32 v1, s66 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1090, 21760 +; CHECK-NEXT: v_mov_b32_e32 v1, s67 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1091, 21760 ; CHECK-NEXT: .cfi_def_cfa_register 65 -; CHECK-NEXT: s_add_i32 s32, s32, 0x6400 +; CHECK-NEXT: s_add_i32 s32, s32, 0x5c00 ; CHECK: s_waitcnt vmcnt(0) -; CHECK: s_mov_b64 exec, s[16:17] -; CHECK: s_mov_b64 s[16:17], exec +; CHECK: s_mov_b64 exec, s[18:19] +; CHECK: s_mov_b64 s[18:19], exec ; CHECK: s_mov_b64 exec, 3 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:384 -; CHECK-NEXT: v_writelane_b32 v0, s30, 0 -; CHECK-NEXT: v_writelane_b32 v0, s31, 1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 16, 23808 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:384 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:352 +; CHECK-NEXT: v_writelane_b32 v1, s30, 0 +; CHECK-NEXT: v_writelane_b32 v1, s31, 1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:352 ; CHECK: ;;#ASMSTART ; CHECK-NEXT: ; clobber nonpreserved and 32 CSR SGPRs @@ -113,22 +134,36 @@ define hidden void @_ZL3barv_spill_RA_to_memory() #0 { ; CHECK: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: s_or_saveexec_b64 s[66:67], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[66:67] ; CHECK-NEXT: s_mov_b64 s[4:5], exec ; CHECK-NEXT: s_mov_b64 exec, 3 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:384 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:352 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readlane_b32 s30, v0, 0 -; CHECK-NEXT: v_readlane_b32 s31, v0, 1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:384 +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:352 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK: s_add_i32 s32, s32, 0xffff9c00 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload +; CHECK: buffer_load_dword v0, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s33, v0 +; CHECK-NEXT: v_readfirstlane_b32 s66, v0 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s67, v0 +; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_add_i32 s32, s32, 0xffffa400 ; CHECK-NEXT: .cfi_def_cfa_register 64 +; CHECK-NEXT: s_mov_b32 s33, s4 +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: call void asm sideeffect "; clobber nonpreserved and 32 CSR SGPRs", diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll similarity index 51% rename from llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll rename to llvm/test/CodeGen/AMDGPU/roundeven.ll index e263c2e5be17c..0326d4895e853 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll @@ -5,6 +5,12 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SDAG_GFX6 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=SDAG_GFX7 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=SDAG_GFX8 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG_GFX9 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX10 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX11 %s define float @v_roundeven_f32(float %x) { ; GFX6-LABEL: v_roundeven_f32: @@ -37,6 +43,37 @@ define float @v_roundeven_f32(float %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f32: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f32: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f32: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f32: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f32: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call float @llvm.roundeven.f32(float %x) ret float %roundeven } @@ -77,6 +114,42 @@ define <2 x float> @v_roundeven_v2f32(<2 x float> %x) { ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v2f32: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v2f32: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v2f32: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v2f32: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) ret <2 x float> %roundeven } @@ -122,6 +195,47 @@ define <3 x float> @v_roundeven_v3f32(<3 x float> %x) { ; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v3f32: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v3f32: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v3f32: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v3f32: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x) ret <3 x float> %roundeven } @@ -172,6 +286,52 @@ define <4 x float> @v_roundeven_v4f32(<4 x float> %x) { ; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 ; GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v4f32: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v4f32: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v4f32: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v4f32: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) ret <4 x float> %roundeven } @@ -211,6 +371,41 @@ define half @v_roundeven_f16(half %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f16: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f16: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f16: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f16: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f16: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call half @llvm.roundeven.f16(half %x) ret half %roundeven } @@ -274,6 +469,63 @@ define <2 x half> @v_roundeven_v2f16(<2 x half> %x) { ; GFX11-NEXT: v_rndne_f16_e32 v1, v1 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v2f16: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v2f16: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v2f16: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v2f16: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10-LABEL: v_roundeven_v2f16: +; SDAG_GFX10: ; %bb.0: +; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX11-LABEL: v_roundeven_v2f16: +; SDAG_GFX11: ; %bb.0: +; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 +; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) ret <2 x half> %roundeven } @@ -351,6 +603,71 @@ define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) { ; GFX11-NEXT: v_rndne_f16_e32 v1, v1 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SDAG_GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG_GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; SDAG_GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SDAG_GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG_GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; SDAG_GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX8-NEXT: v_rndne_f16_e64 v0, -v0 +; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX9-NEXT: v_rndne_f16_e64 v0, -v0 +; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX10: ; %bb.0: +; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX10-NEXT: v_rndne_f16_e64 v0, -v0 +; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg: +; SDAG_GFX11: ; %bb.0: +; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SDAG_GFX11-NEXT: v_rndne_f16_e64 v0, -v0 +; SDAG_GFX11-NEXT: v_rndne_f16_e64 v1, -v1 +; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] %x.fneg = fneg <2 x half> %x %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg) ret <2 x half> %roundeven @@ -441,6 +758,88 @@ define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX11-NEXT: v_pack_b32_f16 v1, v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v4f16: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v4f16: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v4f16: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX8-NEXT: v_rndne_f16_e32 v1, v1 +; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; SDAG_GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v4f16: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX9-NEXT: v_rndne_f16_e32 v1, v1 +; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG_GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10-LABEL: v_roundeven_v4f16: +; SDAG_GFX10: ; %bb.0: +; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX10-NEXT: v_rndne_f16_e32 v1, v1 +; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG_GFX10-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX11-LABEL: v_roundeven_v4f16: +; SDAG_GFX11: ; %bb.0: +; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v2, v2 +; SDAG_GFX11-NEXT: v_rndne_f16_e32 v3, v3 +; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v2 +; SDAG_GFX11-NEXT: v_pack_b32_f16 v1, v1, v3 +; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %roundeven } @@ -477,6 +876,37 @@ define float @v_roundeven_f32_fabs(float %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f32_fabs: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, |v0| +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f32_fabs: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, |v0| +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f32_fabs: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, |v0| +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f32_fabs: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, |v0| +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %roundeven = call float @llvm.roundeven.f32(float %fabs.x) ret float %roundeven @@ -507,6 +937,31 @@ define amdgpu_ps float @s_roundeven_f32(float inreg %x) { ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0 ; GFX10PLUS-NEXT: ; return to shader part epilog +; +; SDAG_GFX6-LABEL: s_roundeven_f32: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, s0 +; SDAG_GFX6-NEXT: ; return to shader part epilog +; +; SDAG_GFX7-LABEL: s_roundeven_f32: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, s0 +; SDAG_GFX7-NEXT: ; return to shader part epilog +; +; SDAG_GFX8-LABEL: s_roundeven_f32: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, s0 +; SDAG_GFX8-NEXT: ; return to shader part epilog +; +; SDAG_GFX9-LABEL: s_roundeven_f32: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, s0 +; SDAG_GFX9-NEXT: ; return to shader part epilog +; +; SDAG_GFX10PLUS-LABEL: s_roundeven_f32: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0 +; SDAG_GFX10PLUS-NEXT: ; return to shader part epilog %roundeven = call float @llvm.roundeven.f32(float %x) ret float %roundeven } @@ -542,6 +997,37 @@ define float @v_roundeven_f32_fneg(float %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f32_fneg: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, -v0 +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f32_fneg: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, -v0 +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f32_fneg: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, -v0 +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f32_fneg: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, -v0 +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg float %x %roundeven = call float @llvm.roundeven.f32(float %neg.x) ret float %roundeven @@ -587,6 +1073,47 @@ define double @v_roundeven_f64(double %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f64: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: s_brev_b32 s4, -2 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000 +; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v1 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0 +; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] +; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 +; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff +; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] +; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f64: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f64: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f64: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f64: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call double @llvm.roundeven.f64(double %x) ret double %roundeven } @@ -632,6 +1159,48 @@ define double @v_roundeven_f64_fneg(double %x) { ; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_f64_fneg: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1 +; SDAG_GFX6-NEXT: s_brev_b32 s4, -2 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000 +; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v6 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0 +; SDAG_GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3] +; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 +; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff +; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] +; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_f64_fneg: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_f64_fneg: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_f64_fneg: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg double %x %roundeven = call double @llvm.roundeven.f64(double %neg.x) ret double %roundeven @@ -688,6 +1257,57 @@ define <2 x double> @v_roundeven_v2f64(<2 x double> %x) { ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] ; GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX6-LABEL: v_roundeven_v2f64: +; SDAG_GFX6: ; %bb.0: +; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX6-NEXT: s_brev_b32 s6, -2 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v8, 0x43300000 +; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v1 +; SDAG_GFX6-NEXT: v_mov_b32_e32 v4, 0 +; SDAG_GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5] +; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 +; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff +; SDAG_GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5] +; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v3 +; SDAG_GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5] +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5] +; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5] +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX7-LABEL: v_roundeven_v2f64: +; SDAG_GFX7: ; %bb.0: +; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] +; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX8-LABEL: v_roundeven_v2f64: +; SDAG_GFX8: ; %bb.0: +; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] +; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX9-LABEL: v_roundeven_v2f64: +; SDAG_GFX9: ; %bb.0: +; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] +; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64: +; SDAG_GFX10PLUS: ; %bb.0: +; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG_GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] +; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] +; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) ret <2 x double> %roundeven } diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir index f2d9aefa66731..5d684a80ba373 100644 --- a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir +++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir @@ -23,13 +23,13 @@ body: | ; GCN-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY3]], [[COPY4]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167 ; GCN-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], [[COPY2]], implicit $exec - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]] - ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_e64_1]], [[COPY6]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[S_MOV_B32_]] + ; GCN-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_e64_1]], [[PRED_COPY]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GCN-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY3]], [[V_ADDC_U32_e64_]], implicit $exec ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736 ; GCN-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[V_MUL_HI_U32_e64_]], [[S_MOV_B32_1]], implicit $exec - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]] - ; GCN-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY7]], killed [[V_MUL_LO_U32_e64_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[S_MOV_B32_1]] + ; GCN-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[PRED_COPY1]], killed [[V_MUL_LO_U32_e64_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec %0:vgpr_32 = COPY $vgpr0 %6:sreg_32 = COPY %0 %1:vgpr_32 = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index 94c946321499e..16c5be25b7902 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -259,7 +259,7 @@ define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) { ; GCN-DAG: s_and_b32 ; GCN-DAG: s_sub_i32 ; GCN-DAG: s_lshr_b32 -; GCN: s_add_i32 +; GCN: v_add_i32_e32 define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) { %icmp0 = icmp ugt i8 %a, %b %sub0 = sub i8 %a, %b @@ -275,8 +275,8 @@ define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext % ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1: ; GCN-DAG: s_cmp_le_u32 s{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b @@ -294,7 +294,7 @@ define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2: ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %d diff --git a/llvm/test/CodeGen/AMDGPU/save-fp.ll b/llvm/test/CodeGen/AMDGPU/save-fp.ll index ec56f41aa1a0a..ccc7e57cb56b2 100644 --- a/llvm/test/CodeGen/AMDGPU/save-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/save-fp.ll @@ -11,14 +11,14 @@ bb: ; GCN-LABEL: {{^}}caller: -; GCN: v_writelane_b32 v2, s33, 2 +; GCN: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33 ; GCN: s_mov_b32 s33, s32 ; GFX900: buffer_store_dword ; GFX908-DAG: v_accvgpr_write_b32 ; GCN: s_swappc_b64 ; GFX900: buffer_load_dword ; GFX908: v_accvgpr_read_b32 -; GCN: v_readlane_b32 s33, v2, 2 +; GCN: s_mov_b32 s33, [[TMP_SGPR]] define i64 @caller() { bb: call void asm sideeffect "", "~{v40}" () diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll index 959bc7f33426b..ff4cecc4b2e27 100644 --- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll @@ -1,22 +1,381 @@ -; RUN: not --crash llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -; This ends up needing to spill SGPRs to memory, and also does not -; have any free SGPRs available to save the exec mask when doing so. -; The register scavenger also needs to use the emergency stack slot, -; which tries to place the scavenged register restore instruction as -; far the block as possible, near the terminator. This places a -; restore instruction between the condition and the conditional -; branch, which gets expanded into a sequence involving s_not_b64 on -; the exec mask, clobbering SCC value before the branch. We probably -; have to stop relying on being able to flip and restore the exec -; mask, and always require a free SGPR for saving exec. +; This was a negative test to catch an extreme case when all options are exhausted +; while trying to spill SGPRs to memory. After we enabled SGPR spills into virtual VGPRs +; the edge case won't arise and the test would always compile. -; CHECK: *** Bad machine code: Using an undefined physical register *** -; CHECK-NEXT: - function: kernel0 -; CHECK-NEXT: - basic block: %bb.0 -; CHECK-NEXT: - instruction: S_CBRANCH_SCC1 %bb.2, implicit killed $scc -; CHECK-NEXT: - operand 1: implicit killed $scc define amdgpu_kernel void @kernel0(i32 addrspace(1)* %out, i32 %in) #1 { +; CHECK-LABEL: kernel0: +; CHECK: ; %bb.0: +; CHECK-NEXT: ; implicit-def: $vgpr23 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[2:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s2, 0 +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: v_writelane_b32 v23, s3, 1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s4, 2 +; CHECK-NEXT: v_writelane_b32 v23, s5, 3 +; CHECK-NEXT: v_writelane_b32 v23, s6, 4 +; CHECK-NEXT: v_writelane_b32 v23, s7, 5 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[4:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s4, 6 +; CHECK-NEXT: v_writelane_b32 v23, s5, 7 +; CHECK-NEXT: v_writelane_b32 v23, s6, 8 +; CHECK-NEXT: v_writelane_b32 v23, s7, 9 +; CHECK-NEXT: v_writelane_b32 v23, s8, 10 +; CHECK-NEXT: v_writelane_b32 v23, s9, 11 +; CHECK-NEXT: v_writelane_b32 v23, s10, 12 +; CHECK-NEXT: v_writelane_b32 v23, s11, 13 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[4:19] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s4, 14 +; CHECK-NEXT: v_writelane_b32 v23, s5, 15 +; CHECK-NEXT: v_writelane_b32 v23, s6, 16 +; CHECK-NEXT: v_writelane_b32 v23, s7, 17 +; CHECK-NEXT: v_writelane_b32 v23, s8, 18 +; CHECK-NEXT: v_writelane_b32 v23, s9, 19 +; CHECK-NEXT: v_writelane_b32 v23, s10, 20 +; CHECK-NEXT: v_writelane_b32 v23, s11, 21 +; CHECK-NEXT: v_writelane_b32 v23, s12, 22 +; CHECK-NEXT: v_writelane_b32 v23, s13, 23 +; CHECK-NEXT: v_writelane_b32 v23, s14, 24 +; CHECK-NEXT: v_writelane_b32 v23, s15, 25 +; CHECK-NEXT: v_writelane_b32 v23, s16, 26 +; CHECK-NEXT: v_writelane_b32 v23, s17, 27 +; CHECK-NEXT: v_writelane_b32 v23, s18, 28 +; CHECK-NEXT: v_writelane_b32 v23, s19, 29 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[2:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s2, 30 +; CHECK-NEXT: v_writelane_b32 v23, s3, 31 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[4:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s4, 32 +; CHECK-NEXT: v_writelane_b32 v23, s5, 33 +; CHECK-NEXT: v_writelane_b32 v23, s6, 34 +; CHECK-NEXT: v_writelane_b32 v23, s7, 35 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[4:11] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s4, 36 +; CHECK-NEXT: v_writelane_b32 v23, s5, 37 +; CHECK-NEXT: v_writelane_b32 v23, s6, 38 +; CHECK-NEXT: v_writelane_b32 v23, s7, 39 +; CHECK-NEXT: v_writelane_b32 v23, s8, 40 +; CHECK-NEXT: v_writelane_b32 v23, s9, 41 +; CHECK-NEXT: v_writelane_b32 v23, s10, 42 +; CHECK-NEXT: v_writelane_b32 v23, s11, 43 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_lg_u32 s0, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[16:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[52:53] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[48:51] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[36:43] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s0, 44 +; CHECK-NEXT: v_writelane_b32 v23, s1, 45 +; CHECK-NEXT: v_writelane_b32 v23, s2, 46 +; CHECK-NEXT: v_writelane_b32 v23, s3, 47 +; CHECK-NEXT: v_writelane_b32 v23, s4, 48 +; CHECK-NEXT: v_writelane_b32 v23, s5, 49 +; CHECK-NEXT: v_writelane_b32 v23, s6, 50 +; CHECK-NEXT: v_writelane_b32 v23, s7, 51 +; CHECK-NEXT: v_writelane_b32 v23, s8, 52 +; CHECK-NEXT: v_writelane_b32 v23, s9, 53 +; CHECK-NEXT: v_writelane_b32 v23, s10, 54 +; CHECK-NEXT: v_writelane_b32 v23, s11, 55 +; CHECK-NEXT: v_writelane_b32 v23, s12, 56 +; CHECK-NEXT: v_writelane_b32 v23, s13, 57 +; CHECK-NEXT: v_writelane_b32 v23, s14, 58 +; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: v_writelane_b32 v23, s15, 59 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[34:35] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[44:47] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v23, s0, 60 +; CHECK-NEXT: v_writelane_b32 v0, s4, 0 +; CHECK-NEXT: v_writelane_b32 v23, s1, 61 +; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: v_writelane_b32 v23, s2, 62 +; CHECK-NEXT: v_writelane_b32 v0, s6, 2 +; CHECK-NEXT: v_writelane_b32 v23, s3, 63 +; CHECK-NEXT: v_writelane_b32 v0, s7, 3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v0, s0, 4 +; CHECK-NEXT: v_writelane_b32 v0, s1, 5 +; CHECK-NEXT: v_writelane_b32 v0, s2, 6 +; CHECK-NEXT: v_writelane_b32 v0, s3, 7 +; CHECK-NEXT: v_writelane_b32 v0, s4, 8 +; CHECK-NEXT: v_writelane_b32 v0, s5, 9 +; CHECK-NEXT: v_writelane_b32 v0, s6, 10 +; CHECK-NEXT: v_writelane_b32 v0, s7, 11 +; CHECK-NEXT: v_writelane_b32 v0, s8, 12 +; CHECK-NEXT: v_writelane_b32 v0, s9, 13 +; CHECK-NEXT: v_writelane_b32 v0, s10, 14 +; CHECK-NEXT: v_writelane_b32 v0, s11, 15 +; CHECK-NEXT: v_writelane_b32 v0, s12, 16 +; CHECK-NEXT: v_writelane_b32 v0, s13, 17 +; CHECK-NEXT: v_writelane_b32 v0, s14, 18 +; CHECK-NEXT: v_writelane_b32 v0, s15, 19 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[54:55] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v0, s0, 20 +; CHECK-NEXT: v_writelane_b32 v0, s1, 21 +; CHECK-NEXT: v_writelane_b32 v0, s2, 22 +; CHECK-NEXT: v_writelane_b32 v0, s3, 23 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v0, s0, 24 +; CHECK-NEXT: v_writelane_b32 v0, s1, 25 +; CHECK-NEXT: v_writelane_b32 v0, s2, 26 +; CHECK-NEXT: v_writelane_b32 v0, s3, 27 +; CHECK-NEXT: v_writelane_b32 v0, s4, 28 +; CHECK-NEXT: v_writelane_b32 v0, s5, 29 +; CHECK-NEXT: v_writelane_b32 v0, s6, 30 +; CHECK-NEXT: v_writelane_b32 v0, s7, 31 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; def s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_writelane_b32 v0, s0, 32 +; CHECK-NEXT: v_writelane_b32 v0, s1, 33 +; CHECK-NEXT: v_writelane_b32 v0, s2, 34 +; CHECK-NEXT: v_writelane_b32 v0, s3, 35 +; CHECK-NEXT: v_writelane_b32 v0, s4, 36 +; CHECK-NEXT: v_writelane_b32 v0, s5, 37 +; CHECK-NEXT: v_writelane_b32 v0, s6, 38 +; CHECK-NEXT: v_writelane_b32 v0, s7, 39 +; CHECK-NEXT: v_writelane_b32 v0, s8, 40 +; CHECK-NEXT: v_writelane_b32 v0, s9, 41 +; CHECK-NEXT: v_writelane_b32 v0, s10, 42 +; CHECK-NEXT: v_writelane_b32 v0, s11, 43 +; CHECK-NEXT: v_writelane_b32 v0, s12, 44 +; CHECK-NEXT: v_writelane_b32 v0, s13, 45 +; CHECK-NEXT: v_writelane_b32 v0, s14, 46 +; CHECK-NEXT: v_writelane_b32 v0, s15, 47 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %ret +; CHECK-NEXT: ; kill: killed $vgpr23 +; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_2: ; %bb0 +; CHECK-NEXT: v_readlane_b32 s0, v23, 0 +; CHECK-NEXT: v_readlane_b32 s1, v23, 1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 2 +; CHECK-NEXT: v_readlane_b32 s1, v23, 3 +; CHECK-NEXT: v_readlane_b32 s2, v23, 4 +; CHECK-NEXT: v_readlane_b32 s3, v23, 5 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 6 +; CHECK-NEXT: v_readlane_b32 s1, v23, 7 +; CHECK-NEXT: v_readlane_b32 s2, v23, 8 +; CHECK-NEXT: v_readlane_b32 s3, v23, 9 +; CHECK-NEXT: v_readlane_b32 s4, v23, 10 +; CHECK-NEXT: v_readlane_b32 s5, v23, 11 +; CHECK-NEXT: v_readlane_b32 s6, v23, 12 +; CHECK-NEXT: v_readlane_b32 s7, v23, 13 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 14 +; CHECK-NEXT: v_readlane_b32 s1, v23, 15 +; CHECK-NEXT: v_readlane_b32 s2, v23, 16 +; CHECK-NEXT: v_readlane_b32 s3, v23, 17 +; CHECK-NEXT: v_readlane_b32 s4, v23, 18 +; CHECK-NEXT: v_readlane_b32 s5, v23, 19 +; CHECK-NEXT: v_readlane_b32 s6, v23, 20 +; CHECK-NEXT: v_readlane_b32 s7, v23, 21 +; CHECK-NEXT: v_readlane_b32 s8, v23, 22 +; CHECK-NEXT: v_readlane_b32 s9, v23, 23 +; CHECK-NEXT: v_readlane_b32 s10, v23, 24 +; CHECK-NEXT: v_readlane_b32 s11, v23, 25 +; CHECK-NEXT: v_readlane_b32 s12, v23, 26 +; CHECK-NEXT: v_readlane_b32 s13, v23, 27 +; CHECK-NEXT: v_readlane_b32 s14, v23, 28 +; CHECK-NEXT: v_readlane_b32 s15, v23, 29 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 30 +; CHECK-NEXT: v_readlane_b32 s1, v23, 31 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:1] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 32 +; CHECK-NEXT: v_readlane_b32 s1, v23, 33 +; CHECK-NEXT: v_readlane_b32 s2, v23, 34 +; CHECK-NEXT: v_readlane_b32 s3, v23, 35 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 36 +; CHECK-NEXT: v_readlane_b32 s1, v23, 37 +; CHECK-NEXT: v_readlane_b32 s2, v23, 38 +; CHECK-NEXT: v_readlane_b32 s3, v23, 39 +; CHECK-NEXT: v_readlane_b32 s4, v23, 40 +; CHECK-NEXT: v_readlane_b32 s5, v23, 41 +; CHECK-NEXT: v_readlane_b32 s6, v23, 42 +; CHECK-NEXT: v_readlane_b32 s7, v23, 43 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 44 +; CHECK-NEXT: v_readlane_b32 s1, v23, 45 +; CHECK-NEXT: v_readlane_b32 s2, v23, 46 +; CHECK-NEXT: v_readlane_b32 s3, v23, 47 +; CHECK-NEXT: v_readlane_b32 s4, v23, 48 +; CHECK-NEXT: v_readlane_b32 s5, v23, 49 +; CHECK-NEXT: v_readlane_b32 s6, v23, 50 +; CHECK-NEXT: v_readlane_b32 s7, v23, 51 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[16:31] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[52:53] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[48:51] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[36:43] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s8, v23, 52 +; CHECK-NEXT: v_readlane_b32 s9, v23, 53 +; CHECK-NEXT: v_readlane_b32 s10, v23, 54 +; CHECK-NEXT: v_readlane_b32 s11, v23, 55 +; CHECK-NEXT: v_readlane_b32 s12, v23, 56 +; CHECK-NEXT: v_readlane_b32 s13, v23, 57 +; CHECK-NEXT: v_readlane_b32 s14, v23, 58 +; CHECK-NEXT: v_readlane_b32 s15, v23, 59 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v23, 60 +; CHECK-NEXT: v_readlane_b32 s1, v23, 61 +; CHECK-NEXT: v_readlane_b32 s2, v23, 62 +; CHECK-NEXT: v_readlane_b32 s3, v23, 63 +; CHECK-NEXT: v_readlane_b32 s4, v0, 0 +; CHECK-NEXT: v_readlane_b32 s5, v0, 1 +; CHECK-NEXT: v_readlane_b32 s6, v0, 2 +; CHECK-NEXT: v_readlane_b32 s7, v0, 3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[34:35] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[44:47] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v0, 4 +; CHECK-NEXT: v_readlane_b32 s1, v0, 5 +; CHECK-NEXT: v_readlane_b32 s2, v0, 6 +; CHECK-NEXT: v_readlane_b32 s3, v0, 7 +; CHECK-NEXT: v_readlane_b32 s4, v0, 8 +; CHECK-NEXT: v_readlane_b32 s5, v0, 9 +; CHECK-NEXT: v_readlane_b32 s6, v0, 10 +; CHECK-NEXT: v_readlane_b32 s7, v0, 11 +; CHECK-NEXT: v_readlane_b32 s8, v0, 12 +; CHECK-NEXT: v_readlane_b32 s9, v0, 13 +; CHECK-NEXT: v_readlane_b32 s10, v0, 14 +; CHECK-NEXT: v_readlane_b32 s11, v0, 15 +; CHECK-NEXT: v_readlane_b32 s12, v0, 16 +; CHECK-NEXT: v_readlane_b32 s13, v0, 17 +; CHECK-NEXT: v_readlane_b32 s14, v0, 18 +; CHECK-NEXT: v_readlane_b32 s15, v0, 19 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v0, 20 +; CHECK-NEXT: v_readlane_b32 s1, v0, 21 +; CHECK-NEXT: v_readlane_b32 s2, v0, 22 +; CHECK-NEXT: v_readlane_b32 s3, v0, 23 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[54:55] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:3] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v0, 24 +; CHECK-NEXT: v_readlane_b32 s1, v0, 25 +; CHECK-NEXT: v_readlane_b32 s2, v0, 26 +; CHECK-NEXT: v_readlane_b32 s3, v0, 27 +; CHECK-NEXT: v_readlane_b32 s4, v0, 28 +; CHECK-NEXT: v_readlane_b32 s5, v0, 29 +; CHECK-NEXT: v_readlane_b32 s6, v0, 30 +; CHECK-NEXT: v_readlane_b32 s7, v0, 31 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:7] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s0, v0, 32 +; CHECK-NEXT: v_readlane_b32 s1, v0, 33 +; CHECK-NEXT: v_readlane_b32 s2, v0, 34 +; CHECK-NEXT: v_readlane_b32 s3, v0, 35 +; CHECK-NEXT: v_readlane_b32 s4, v0, 36 +; CHECK-NEXT: v_readlane_b32 s5, v0, 37 +; CHECK-NEXT: v_readlane_b32 s6, v0, 38 +; CHECK-NEXT: v_readlane_b32 s7, v0, 39 +; CHECK-NEXT: v_readlane_b32 s8, v0, 40 +; CHECK-NEXT: v_readlane_b32 s9, v0, 41 +; CHECK-NEXT: v_readlane_b32 s10, v0, 42 +; CHECK-NEXT: v_readlane_b32 s11, v0, 43 +; CHECK-NEXT: v_readlane_b32 s12, v0, 44 +; CHECK-NEXT: v_readlane_b32 s13, v0, 45 +; CHECK-NEXT: v_readlane_b32 s14, v0, 46 +; CHECK-NEXT: v_readlane_b32 s15, v0, 47 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s[0:15] +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ; kill: killed $vgpr23 +; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 call void asm sideeffect "", "~{v[16:19]}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll index 87b2d29c8b0fb..ff3371e4d5551 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll @@ -6,8 +6,8 @@ ; SI-MINREG: NumSgprs: {{[1-9]$}} ; SI-MINREG: NumVgprs: {{[1-9]$}} -; SI-MAXOCC: NumSgprs: {{[1-4]?[0-9]$}} -; SI-MAXOCC: NumVgprs: {{[1-4]?[0-9]$}} +; SI-MAXOCC: NumSgprs: {{[0-4][0-9]$}} +; SI-MAXOCC: NumVgprs: {{[0-4][0-9]$}} ; stores may alias loads ; VI: NumSgprs: {{[0-9]$}} diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll index 465acccd3d4d9..5df7470f65f2c 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll @@ -46,11 +46,11 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; GCN-NEXT: v_mul_hi_u32 v3, v0, v3 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v1 ; GCN-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; GCN-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GCN-NEXT: v_subrev_i32_e32 v0, vcc, v4, v0 +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] ; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v1, v0 -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] ; GCN-NEXT: v_add_i32_e32 v4, vcc, 1, v3 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc @@ -91,11 +91,11 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; TONGA-NEXT: v_mul_hi_u32 v3, v0, v3 ; TONGA-NEXT: v_mul_lo_u32 v4, v3, v1 ; TONGA-NEXT: v_add_u32_e32 v5, vcc, 1, v3 -; TONGA-NEXT: v_sub_u32_e32 v0, vcc, v0, v4 +; TONGA-NEXT: v_subrev_u32_e32 v0, vcc, v4, v0 +; TONGA-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v1 +; TONGA-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] ; TONGA-NEXT: v_subrev_u32_e32 v4, vcc, v1, v0 -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 -; TONGA-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; TONGA-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; TONGA-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] ; TONGA-NEXT: v_add_u32_e32 v4, vcc, 1, v3 ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 ; TONGA-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc @@ -118,38 +118,34 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; GFX9-NEXT: s_mov_b32 s4, s0 ; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_readfirstlane_b32 s2, v1 -; GFX9-NEXT: s_ashr_i32 s3, s2, 31 -; GFX9-NEXT: s_add_i32 s2, s2, s3 -; GFX9-NEXT: s_xor_b32 s2, s2, s3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 -; GFX9-NEXT: s_add_i32 s0, s0, s1 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX9-NEXT: s_xor_b32 s3, s1, s3 -; GFX9-NEXT: s_xor_b32 s0, s0, s1 -; GFX9-NEXT: s_sub_i32 s1, 0, s2 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s1, s1, s8 -; GFX9-NEXT: s_mul_hi_u32 s1, s8, s1 -; GFX9-NEXT: s_add_i32 s8, s8, s1 -; GFX9-NEXT: s_mul_hi_u32 s1, s0, s8 -; GFX9-NEXT: s_mul_i32 s8, s1, s2 -; GFX9-NEXT: s_sub_i32 s0, s0, s8 -; GFX9-NEXT: s_add_i32 s9, s1, 1 -; GFX9-NEXT: s_sub_i32 s8, s0, s2 -; GFX9-NEXT: s_cmp_ge_u32 s0, s2 -; GFX9-NEXT: s_cselect_b32 s1, s9, s1 -; GFX9-NEXT: s_cselect_b32 s0, s8, s0 -; GFX9-NEXT: s_add_i32 s8, s1, 1 -; GFX9-NEXT: s_cmp_ge_u32 s0, s2 -; GFX9-NEXT: s_cselect_b32 s0, s8, s1 -; GFX9-NEXT: s_xor_b32 s0, s0, s3 -; GFX9-NEXT: s_sub_i32 s0, s0, s3 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GFX9-NEXT: v_sub_u32_e32 v4, 0, v1 +; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v5 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 +; GFX9-NEXT: v_xor_b32_e32 v2, v5, v2 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 +; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v3, v1 +; GFX9-NEXT: v_add_u32_e32 v5, 1, v3 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GFX9-NEXT: v_sub_u32_e32 v4, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v3 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; @@ -312,7 +308,7 @@ define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrsp ; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mul_hi_i32 v1, v0, s2 -; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 31, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v0, 11, v0 ; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 @@ -335,7 +331,7 @@ define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrsp ; TONGA-NEXT: s_mov_b32 s5, s1 ; TONGA-NEXT: s_waitcnt vmcnt(0) ; TONGA-NEXT: v_mul_hi_i32 v1, v0, s2 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v1, v0 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; TONGA-NEXT: v_lshrrev_b32_e32 v1, 31, v0 ; TONGA-NEXT: v_ashrrev_i32_e32 v0, 11, v0 ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v1, v0 @@ -436,22 +432,22 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; GCN-NEXT: v_mul_hi_u32 v4, v5, v10 ; GCN-NEXT: v_xor_b32_e32 v1, v1, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v7, v11 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v6 ; GCN-NEXT: v_mul_hi_u32 v4, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v5, v1, v5 ; GCN-NEXT: v_mul_lo_u32 v6, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v10, v5, v3 ; GCN-NEXT: v_add_i32_e32 v7, vcc, 1, v4 ; GCN-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 -; GCN-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 +; GCN-NEXT: v_subrev_i32_e32 v1, vcc, v10, v1 ; GCN-NEXT: v_add_i32_e32 v11, vcc, 1, v5 ; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v2 ; GCN-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v3 -; GCN-NEXT: v_subrev_i32_e32 v6, vcc, v2, v0 ; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; GCN-NEXT: v_subrev_i32_e32 v7, vcc, v3, v1 +; GCN-NEXT: v_subrev_i32_e32 v6, vcc, v2, v0 ; GCN-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[2:3] +; GCN-NEXT: v_subrev_i32_e32 v7, vcc, v3, v1 ; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; GCN-NEXT: v_add_i32_e32 v6, vcc, 1, v4 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] @@ -462,7 +458,7 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GCN-NEXT: v_xor_b32_e32 v0, v0, v8 ; GCN-NEXT: v_xor_b32_e32 v1, v1, v9 -; GCN-NEXT: v_subrev_i32_e32 v0, vcc, v8, v0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; GCN-NEXT: v_subrev_i32_e32 v1, vcc, v9, v1 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm @@ -509,8 +505,8 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; TONGA-NEXT: v_mul_hi_u32 v4, v5, v10 ; TONGA-NEXT: v_xor_b32_e32 v1, v1, v6 ; TONGA-NEXT: v_mul_hi_u32 v6, v7, v11 -; TONGA-NEXT: v_add_u32_e32 v4, vcc, v5, v4 -; TONGA-NEXT: v_add_u32_e32 v5, vcc, v6, v7 +; TONGA-NEXT: v_add_u32_e32 v4, vcc, v4, v5 +; TONGA-NEXT: v_add_u32_e32 v5, vcc, v7, v6 ; TONGA-NEXT: v_mul_hi_u32 v4, v0, v4 ; TONGA-NEXT: v_mul_hi_u32 v5, v1, v5 ; TONGA-NEXT: v_mul_lo_u32 v6, v4, v2 @@ -521,10 +517,10 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; TONGA-NEXT: v_add_u32_e32 v11, vcc, 1, v5 ; TONGA-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v2 ; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v3 -; TONGA-NEXT: v_subrev_u32_e32 v6, vcc, v2, v0 ; TONGA-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; TONGA-NEXT: v_subrev_u32_e32 v7, vcc, v3, v1 +; TONGA-NEXT: v_subrev_u32_e32 v6, vcc, v2, v0 ; TONGA-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[2:3] +; TONGA-NEXT: v_subrev_u32_e32 v7, vcc, v3, v1 ; TONGA-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; TONGA-NEXT: v_add_u32_e32 v6, vcc, 1, v4 ; TONGA-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] @@ -542,83 +538,75 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; ; GFX9-LABEL: sdiv_v2i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 -; GFX9-NEXT: s_mov_b32 s11, s3 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 +; GFX9-NEXT: s_mov_b32 s10, s6 +; GFX9-NEXT: s_mov_b32 s11, s7 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 +; GFX9-NEXT: s_mov_b32 s8, s2 +; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GFX9-NEXT: s_mov_b32 s4, s0 +; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_readfirstlane_b32 s0, v2 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 -; GFX9-NEXT: s_add_i32 s0, s0, s1 -; GFX9-NEXT: s_xor_b32 s6, s0, s1 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s6 -; GFX9-NEXT: v_readfirstlane_b32 s7, v0 -; GFX9-NEXT: s_ashr_i32 s8, s7, 31 -; GFX9-NEXT: s_add_i32 s7, s7, s8 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX9-NEXT: s_xor_b32 s9, s8, s1 -; GFX9-NEXT: s_xor_b32 s1, s7, s8 -; GFX9-NEXT: s_sub_i32 s7, 0, s6 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v2 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: v_readfirstlane_b32 s4, v3 -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s7, s7, s8 -; GFX9-NEXT: s_mul_hi_u32 s7, s8, s7 -; GFX9-NEXT: s_add_i32 s8, s8, s7 -; GFX9-NEXT: s_mul_hi_u32 s7, s1, s8 -; GFX9-NEXT: s_mul_i32 s8, s7, s6 -; GFX9-NEXT: s_sub_i32 s1, s1, s8 -; GFX9-NEXT: s_add_i32 s10, s7, 1 -; GFX9-NEXT: s_sub_i32 s8, s1, s6 -; GFX9-NEXT: s_cmp_ge_u32 s1, s6 -; GFX9-NEXT: s_cselect_b32 s7, s10, s7 -; GFX9-NEXT: s_cselect_b32 s1, s8, s1 -; GFX9-NEXT: s_add_i32 s8, s7, 1 -; GFX9-NEXT: s_cmp_ge_u32 s1, s6 -; GFX9-NEXT: s_cselect_b32 s6, s8, s7 -; GFX9-NEXT: s_ashr_i32 s7, s4, 31 -; GFX9-NEXT: s_add_i32 s4, s4, s7 -; GFX9-NEXT: s_xor_b32 s4, s4, s7 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 -; GFX9-NEXT: s_ashr_i32 s8, s5, 31 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s6, s6, s9 -; GFX9-NEXT: s_add_i32 s5, s5, s8 -; GFX9-NEXT: s_xor_b32 s7, s8, s7 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s6, s6, s9 -; GFX9-NEXT: s_xor_b32 s5, s5, s8 -; GFX9-NEXT: s_sub_i32 s8, 0, s4 -; GFX9-NEXT: v_readfirstlane_b32 s9, v0 -; GFX9-NEXT: s_mul_i32 s8, s8, s9 -; GFX9-NEXT: s_mul_hi_u32 s8, s9, s8 -; GFX9-NEXT: s_add_i32 s9, s9, s8 -; GFX9-NEXT: s_mul_hi_u32 s8, s5, s9 -; GFX9-NEXT: s_mul_i32 s9, s8, s4 -; GFX9-NEXT: s_sub_i32 s5, s5, s9 -; GFX9-NEXT: s_add_i32 s10, s8, 1 -; GFX9-NEXT: s_sub_i32 s9, s5, s4 -; GFX9-NEXT: s_cmp_ge_u32 s5, s4 -; GFX9-NEXT: s_cselect_b32 s8, s10, s8 -; GFX9-NEXT: s_cselect_b32 s5, s9, s5 -; GFX9-NEXT: s_add_i32 s9, s8, 1 -; GFX9-NEXT: s_cmp_ge_u32 s5, s4 -; GFX9-NEXT: s_cselect_b32 s4, s9, s8 -; GFX9-NEXT: s_xor_b32 s4, s4, s7 -; GFX9-NEXT: s_sub_i32 s4, s4, s7 -; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v2 +; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v5 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v4 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 +; GFX9-NEXT: v_cvt_f32_u32_e32 v6, v2 +; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GFX9-NEXT: v_sub_u32_e32 v10, 0, v2 +; GFX9-NEXT: v_sub_u32_e32 v11, 0, v3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v0 +; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v1 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v8 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v9 +; GFX9-NEXT: v_mul_lo_u32 v10, v10, v6 +; GFX9-NEXT: v_mul_lo_u32 v11, v11, v7 +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v9 +; GFX9-NEXT: v_mul_hi_u32 v10, v6, v10 +; GFX9-NEXT: v_mul_hi_u32 v11, v7, v11 +; GFX9-NEXT: v_xor_b32_e32 v4, v8, v4 +; GFX9-NEXT: v_xor_b32_e32 v5, v9, v5 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v10 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v11 +; GFX9-NEXT: v_mul_hi_u32 v6, v0, v6 +; GFX9-NEXT: v_mul_hi_u32 v7, v1, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v2 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v3 +; GFX9-NEXT: v_add_u32_e32 v10, 1, v6 +; GFX9-NEXT: v_add_u32_e32 v11, 1, v7 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v9 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GFX9-NEXT: v_sub_u32_e32 v8, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GFX9-NEXT: v_cmp_ge_u32_e64 s[0:1], v1, v3 +; GFX9-NEXT: v_sub_u32_e32 v9, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[0:1] +; GFX9-NEXT: v_add_u32_e32 v8, 1, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[0:1] +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GFX9-NEXT: v_add_u32_e32 v9, 1, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v5 +; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; ; EG-LABEL: sdiv_v2i32: @@ -846,7 +834,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_xor_b32_e32 v1, v1, v10 ; GCN-NEXT: v_cvt_f32_u32_e32 v10, v5 ; GCN-NEXT: v_cvt_f32_u32_e32 v11, v6 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_rcp_iflag_f32_e32 v10, v10 ; GCN-NEXT: v_rcp_iflag_f32_e32 v11, v11 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v8 @@ -877,19 +865,19 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_cvt_f32_u32_e32 v12, v4 ; GCN-NEXT: v_mul_hi_u32 v0, v10, v0 ; GCN-NEXT: v_add_i32_e32 v11, vcc, 1, v8 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v9, v7 ; GCN-NEXT: v_mul_hi_u32 v7, v1, v7 -; GCN-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v10 ; GCN-NEXT: v_mul_hi_u32 v0, v2, v0 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v5 ; GCN-NEXT: v_rcp_iflag_f32_e32 v12, v12 ; GCN-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 -; GCN-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 +; GCN-NEXT: v_subrev_i32_e32 v1, vcc, v10, v1 ; GCN-NEXT: v_mul_lo_u32 v10, v0, v6 ; GCN-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v5 ; GCN-NEXT: v_mul_f32_e32 v12, 0x4f7ffffe, v12 ; GCN-NEXT: v_cvt_u32_f32_e32 v12, v12 -; GCN-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GCN-NEXT: v_subrev_i32_e32 v2, vcc, v10, v2 ; GCN-NEXT: v_add_i32_e32 v10, vcc, 1, v7 ; GCN-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[2:3] ; GCN-NEXT: v_add_i32_e32 v10, vcc, 1, v0 @@ -905,7 +893,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc ; GCN-NEXT: v_xor_b32_e32 v1, v8, v15 ; GCN-NEXT: v_xor_b32_e32 v5, v0, v16 -; GCN-NEXT: v_subrev_i32_e32 v0, vcc, v15, v1 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, v1, v15 ; GCN-NEXT: v_subrev_i32_e32 v1, vcc, v16, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v9, v12 ; GCN-NEXT: v_ashrrev_i32_e32 v8, 31, v3 @@ -921,12 +909,12 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_mul_lo_u32 v6, v5, v4 ; GCN-NEXT: v_subrev_i32_e32 v2, vcc, v17, v2 ; GCN-NEXT: v_xor_b32_e32 v7, v8, v14 -; GCN-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 +; GCN-NEXT: v_subrev_i32_e32 v3, vcc, v6, v3 ; GCN-NEXT: v_add_i32_e32 v6, vcc, 1, v5 -; GCN-NEXT: v_subrev_i32_e32 v8, vcc, v4, v3 -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v3, v4 +; GCN-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[0:1] +; GCN-NEXT: v_subrev_i32_e32 v6, vcc, v4, v3 +; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] ; GCN-NEXT: v_add_i32_e32 v6, vcc, 1, v5 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 ; GCN-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc @@ -977,7 +965,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; TONGA-NEXT: v_xor_b32_e32 v1, v1, v10 ; TONGA-NEXT: v_cvt_f32_u32_e32 v10, v5 ; TONGA-NEXT: v_cvt_f32_u32_e32 v11, v6 -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v9 +; TONGA-NEXT: v_add_u32_e32 v8, vcc, v9, v8 ; TONGA-NEXT: v_rcp_iflag_f32_e32 v10, v10 ; TONGA-NEXT: v_rcp_iflag_f32_e32 v11, v11 ; TONGA-NEXT: v_mul_hi_u32 v8, v0, v8 @@ -1008,9 +996,9 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; TONGA-NEXT: v_cvt_f32_u32_e32 v12, v4 ; TONGA-NEXT: v_mul_hi_u32 v0, v10, v0 ; TONGA-NEXT: v_add_u32_e32 v11, vcc, 1, v8 -; TONGA-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; TONGA-NEXT: v_add_u32_e32 v7, vcc, v9, v7 ; TONGA-NEXT: v_mul_hi_u32 v7, v1, v7 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v10, v0 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v10 ; TONGA-NEXT: v_mul_hi_u32 v0, v2, v0 ; TONGA-NEXT: v_mul_lo_u32 v10, v7, v5 ; TONGA-NEXT: v_rcp_iflag_f32_e32 v12, v12 @@ -1020,7 +1008,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v5 ; TONGA-NEXT: v_mul_f32_e32 v12, 0x4f7ffffe, v12 ; TONGA-NEXT: v_cvt_u32_f32_e32 v12, v12 -; TONGA-NEXT: v_sub_u32_e32 v2, vcc, v2, v10 +; TONGA-NEXT: v_subrev_u32_e32 v2, vcc, v10, v2 ; TONGA-NEXT: v_add_u32_e32 v10, vcc, 1, v7 ; TONGA-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[2:3] ; TONGA-NEXT: v_add_u32_e32 v10, vcc, 1, v0 @@ -1052,165 +1040,149 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; TONGA-NEXT: v_mul_lo_u32 v6, v5, v4 ; TONGA-NEXT: v_subrev_u32_e32 v2, vcc, v17, v2 ; TONGA-NEXT: v_xor_b32_e32 v7, v8, v14 -; TONGA-NEXT: v_sub_u32_e32 v3, vcc, v3, v6 +; TONGA-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 ; TONGA-NEXT: v_add_u32_e32 v6, vcc, 1, v5 -; TONGA-NEXT: v_subrev_u32_e32 v8, vcc, v4, v3 -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 -; TONGA-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc -; TONGA-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; TONGA-NEXT: v_cmp_ge_u32_e64 s[0:1], v3, v4 +; TONGA-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[0:1] +; TONGA-NEXT: v_subrev_u32_e32 v6, vcc, v4, v3 +; TONGA-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] ; TONGA-NEXT: v_add_u32_e32 v6, vcc, 1, v5 ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 ; TONGA-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; TONGA-NEXT: v_xor_b32_e32 v3, v3, v7 -; TONGA-NEXT: v_subrev_u32_e32 v3, vcc, v7, v3 +; TONGA-NEXT: v_sub_u32_e32 v3, vcc, v3, v7 ; TONGA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; TONGA-NEXT: s_endpgm ; ; GFX9-LABEL: sdiv_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 -; GFX9-NEXT: s_mov_b32 s11, s3 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_mov_b32 s11, 0xf000 +; GFX9-NEXT: s_mov_b32 s10, -1 +; GFX9-NEXT: s_mov_b32 s6, s10 +; GFX9-NEXT: s_mov_b32 s7, s11 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 -; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 -; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GFX9-NEXT: s_mov_b32 s8, s0 +; GFX9-NEXT: s_mov_b32 s9, s1 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_readfirstlane_b32 s1, v0 -; GFX9-NEXT: s_ashr_i32 s4, s1, 31 -; GFX9-NEXT: s_add_i32 s1, s1, s4 -; GFX9-NEXT: s_xor_b32 s6, s1, s4 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_readfirstlane_b32 s8, v4 -; GFX9-NEXT: s_ashr_i32 s9, s8, 31 -; GFX9-NEXT: s_add_i32 s8, s8, s9 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s4, s9, s4 -; GFX9-NEXT: s_xor_b32 s8, s8, s9 -; GFX9-NEXT: s_sub_i32 s9, 0, s6 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s7, v1 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: v_readfirstlane_b32 s5, v3 -; GFX9-NEXT: v_readfirstlane_b32 s10, v0 -; GFX9-NEXT: s_mul_i32 s9, s9, s10 -; GFX9-NEXT: s_mul_hi_u32 s9, s10, s9 -; GFX9-NEXT: s_add_i32 s10, s10, s9 -; GFX9-NEXT: s_mul_hi_u32 s9, s8, s10 -; GFX9-NEXT: s_mul_i32 s10, s9, s6 -; GFX9-NEXT: s_sub_i32 s8, s8, s10 -; GFX9-NEXT: s_add_i32 s11, s9, 1 -; GFX9-NEXT: s_sub_i32 s10, s8, s6 -; GFX9-NEXT: s_cmp_ge_u32 s8, s6 -; GFX9-NEXT: s_cselect_b32 s9, s11, s9 -; GFX9-NEXT: s_cselect_b32 s8, s10, s8 -; GFX9-NEXT: s_add_i32 s10, s9, 1 -; GFX9-NEXT: s_cmp_ge_u32 s8, s6 -; GFX9-NEXT: s_cselect_b32 s6, s10, s9 -; GFX9-NEXT: s_ashr_i32 s8, s7, 31 -; GFX9-NEXT: s_add_i32 s7, s7, s8 -; GFX9-NEXT: s_xor_b32 s7, s7, s8 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX9-NEXT: v_readfirstlane_b32 s10, v5 -; GFX9-NEXT: s_ashr_i32 s11, s10, 31 -; GFX9-NEXT: s_xor_b32 s6, s6, s4 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_add_i32 s10, s10, s11 -; GFX9-NEXT: s_xor_b32 s8, s11, s8 -; GFX9-NEXT: s_sub_i32 s4, s6, s4 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s6, s10, s11 -; GFX9-NEXT: s_sub_i32 s10, 0, s7 -; GFX9-NEXT: v_readfirstlane_b32 s9, v2 -; GFX9-NEXT: v_readfirstlane_b32 s11, v0 -; GFX9-NEXT: s_mul_i32 s10, s10, s11 -; GFX9-NEXT: s_mul_hi_u32 s10, s11, s10 -; GFX9-NEXT: s_add_i32 s11, s11, s10 -; GFX9-NEXT: s_mul_hi_u32 s10, s6, s11 -; GFX9-NEXT: s_mul_i32 s11, s10, s7 -; GFX9-NEXT: s_sub_i32 s6, s6, s11 -; GFX9-NEXT: s_add_i32 s12, s10, 1 -; GFX9-NEXT: s_sub_i32 s11, s6, s7 -; GFX9-NEXT: s_cmp_ge_u32 s6, s7 -; GFX9-NEXT: s_cselect_b32 s10, s12, s10 -; GFX9-NEXT: s_cselect_b32 s6, s11, s6 -; GFX9-NEXT: s_add_i32 s11, s10, 1 -; GFX9-NEXT: s_cmp_ge_u32 s6, s7 -; GFX9-NEXT: s_cselect_b32 s6, s11, s10 -; GFX9-NEXT: s_ashr_i32 s7, s9, 31 -; GFX9-NEXT: s_add_i32 s9, s9, s7 -; GFX9-NEXT: s_xor_b32 s9, s9, s7 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 -; GFX9-NEXT: v_readfirstlane_b32 s11, v6 -; GFX9-NEXT: s_ashr_i32 s12, s11, 31 -; GFX9-NEXT: s_xor_b32 s6, s6, s8 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_add_i32 s11, s11, s12 -; GFX9-NEXT: s_xor_b32 s7, s12, s7 -; GFX9-NEXT: s_sub_i32 s6, s6, s8 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s8, s11, s12 -; GFX9-NEXT: s_sub_i32 s11, 0, s9 -; GFX9-NEXT: v_readfirstlane_b32 s10, v7 -; GFX9-NEXT: v_readfirstlane_b32 s12, v0 -; GFX9-NEXT: s_mul_i32 s11, s11, s12 -; GFX9-NEXT: s_mul_hi_u32 s11, s12, s11 -; GFX9-NEXT: s_add_i32 s12, s12, s11 -; GFX9-NEXT: s_mul_hi_u32 s11, s8, s12 -; GFX9-NEXT: s_mul_i32 s12, s11, s9 -; GFX9-NEXT: s_sub_i32 s8, s8, s12 -; GFX9-NEXT: s_add_i32 s13, s11, 1 -; GFX9-NEXT: s_sub_i32 s12, s8, s9 -; GFX9-NEXT: s_cmp_ge_u32 s8, s9 -; GFX9-NEXT: s_cselect_b32 s11, s13, s11 -; GFX9-NEXT: s_cselect_b32 s8, s12, s8 -; GFX9-NEXT: s_add_i32 s12, s11, 1 -; GFX9-NEXT: s_cmp_ge_u32 s8, s9 -; GFX9-NEXT: s_cselect_b32 s8, s12, s11 -; GFX9-NEXT: s_ashr_i32 s9, s5, 31 -; GFX9-NEXT: s_add_i32 s5, s5, s9 -; GFX9-NEXT: s_xor_b32 s5, s5, s9 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s5 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_ashr_i32 s4, s10, 31 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX9-NEXT: s_xor_b32 s6, s8, s7 -; GFX9-NEXT: s_xor_b32 s8, s4, s9 -; GFX9-NEXT: s_sub_i32 s6, s6, s7 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX9-NEXT: s_sub_i32 s7, 0, s5 -; GFX9-NEXT: s_add_i32 s10, s10, s4 -; GFX9-NEXT: s_xor_b32 s4, s10, s4 -; GFX9-NEXT: v_readfirstlane_b32 s9, v2 -; GFX9-NEXT: s_mul_i32 s7, s7, s9 -; GFX9-NEXT: s_mul_hi_u32 s7, s9, s7 -; GFX9-NEXT: s_add_i32 s9, s9, s7 -; GFX9-NEXT: s_mul_hi_u32 s7, s4, s9 -; GFX9-NEXT: s_mul_i32 s9, s7, s5 -; GFX9-NEXT: s_sub_i32 s4, s4, s9 -; GFX9-NEXT: s_add_i32 s10, s7, 1 -; GFX9-NEXT: s_sub_i32 s9, s4, s5 -; GFX9-NEXT: s_cmp_ge_u32 s4, s5 -; GFX9-NEXT: s_cselect_b32 s7, s10, s7 -; GFX9-NEXT: s_cselect_b32 s4, s9, s4 -; GFX9-NEXT: s_add_i32 s9, s7, 1 -; GFX9-NEXT: s_cmp_ge_u32 s4, s5 -; GFX9-NEXT: s_cselect_b32 s4, s9, s7 -; GFX9-NEXT: s_xor_b32 s4, s4, s8 -; GFX9-NEXT: s_sub_i32 s4, s4, s8 -; GFX9-NEXT: v_mov_b32_e32 v2, s6 -; GFX9-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v4 +; GFX9-NEXT: v_add_u32_e32 v4, v4, v9 +; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v5 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v8 +; GFX9-NEXT: v_xor_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GFX9-NEXT: v_ashrrev_i32_e32 v13, 31, v6 +; GFX9-NEXT: v_xor_b32_e32 v16, v8, v9 +; GFX9-NEXT: v_add_u32_e32 v5, v5, v11 +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v4 +; GFX9-NEXT: v_ashrrev_i32_e32 v12, 31, v2 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v7 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v10 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v13 +; GFX9-NEXT: v_xor_b32_e32 v5, v5, v11 +; GFX9-NEXT: v_ashrrev_i32_e32 v14, 31, v3 +; GFX9-NEXT: v_add_u32_e32 v2, v2, v12 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v15 +; GFX9-NEXT: v_xor_b32_e32 v17, v10, v11 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v10 +; GFX9-NEXT: v_xor_b32_e32 v6, v6, v13 +; GFX9-NEXT: v_cvt_f32_u32_e32 v10, v5 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v14 +; GFX9-NEXT: v_xor_b32_e32 v18, v12, v13 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v12 +; GFX9-NEXT: v_xor_b32_e32 v7, v7, v15 +; GFX9-NEXT: v_cvt_f32_u32_e32 v12, v6 +; GFX9-NEXT: v_xor_b32_e32 v19, v14, v15 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v14 +; GFX9-NEXT: v_cvt_f32_u32_e32 v14, v7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v10, v10 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v12, v12 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v14, v14 +; GFX9-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 +; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GFX9-NEXT: v_mul_f32_e32 v10, 0x4f7ffffe, v10 +; GFX9-NEXT: v_mul_f32_e32 v12, 0x4f7ffffe, v12 +; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GFX9-NEXT: v_sub_u32_e32 v9, 0, v4 +; GFX9-NEXT: v_mul_f32_e32 v14, 0x4f7ffffe, v14 +; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GFX9-NEXT: v_cvt_u32_f32_e32 v14, v14 +; GFX9-NEXT: v_mul_lo_u32 v9, v9, v8 +; GFX9-NEXT: v_sub_u32_e32 v11, 0, v5 +; GFX9-NEXT: v_sub_u32_e32 v13, 0, v6 +; GFX9-NEXT: v_mul_lo_u32 v11, v11, v10 +; GFX9-NEXT: v_sub_u32_e32 v15, 0, v7 +; GFX9-NEXT: v_mul_lo_u32 v13, v13, v12 +; GFX9-NEXT: v_mul_lo_u32 v15, v15, v14 +; GFX9-NEXT: v_mul_hi_u32 v9, v8, v9 +; GFX9-NEXT: v_mul_hi_u32 v11, v10, v11 +; GFX9-NEXT: v_mul_hi_u32 v13, v12, v13 +; GFX9-NEXT: v_mul_hi_u32 v15, v14, v15 +; GFX9-NEXT: v_add_u32_e32 v8, v8, v9 +; GFX9-NEXT: v_mul_hi_u32 v8, v0, v8 +; GFX9-NEXT: v_add_u32_e32 v9, v10, v11 +; GFX9-NEXT: v_add_u32_e32 v10, v12, v13 +; GFX9-NEXT: v_mul_hi_u32 v9, v1, v9 +; GFX9-NEXT: v_add_u32_e32 v11, v14, v15 +; GFX9-NEXT: v_mul_hi_u32 v10, v2, v10 +; GFX9-NEXT: v_mul_hi_u32 v11, v3, v11 +; GFX9-NEXT: v_mul_lo_u32 v12, v8, v4 +; GFX9-NEXT: v_mul_lo_u32 v14, v9, v5 +; GFX9-NEXT: v_mul_lo_u32 v15, v10, v6 +; GFX9-NEXT: v_add_u32_e32 v13, 1, v8 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v12 +; GFX9-NEXT: v_mul_lo_u32 v12, v11, v7 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v14 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GFX9-NEXT: v_add_u32_e32 v14, 1, v9 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v15 +; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc +; GFX9-NEXT: v_sub_u32_e32 v13, v0, v4 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[0:1], v1, v5 +; GFX9-NEXT: v_add_u32_e32 v15, 1, v10 +; GFX9-NEXT: v_sub_u32_e32 v3, v3, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[0:1] +; GFX9-NEXT: v_sub_u32_e32 v14, v1, v5 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[2:3], v2, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc +; GFX9-NEXT: v_add_u32_e32 v12, 1, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v15, s[2:3] +; GFX9-NEXT: v_sub_u32_e32 v15, v2, v6 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7 +; GFX9-NEXT: v_add_u32_e32 v13, 1, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v14, s[0:1] +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[4:5] +; GFX9-NEXT: v_sub_u32_e32 v12, v3, v7 +; GFX9-NEXT: v_add_u32_e32 v14, 1, v9 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v15, s[2:3] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v13, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; GFX9-NEXT: v_add_u32_e32 v15, 1, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v12, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GFX9-NEXT: v_add_u32_e32 v12, 1, v11 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v15, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v16 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v17 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v18 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v19 +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v16 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v17 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v18 +; GFX9-NEXT: v_sub_u32_e32 v3, v3, v19 +; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; GFX9-NEXT: s_endpgm ; ; EG-LABEL: sdiv_v4i32: @@ -1852,7 +1824,7 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v1| ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm @@ -2029,11 +2001,11 @@ define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* ; GCN-NEXT: v_mul_hi_u32 v3, v5, v3 ; GCN-NEXT: v_mul_lo_u32 v1, v3, v2 ; GCN-NEXT: v_add_i32_e32 v4, vcc, 1, v3 -; GCN-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 -; GCN-NEXT: v_subrev_i32_e32 v5, vcc, v2, v1 -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GCN-NEXT: v_subrev_i32_e32 v1, vcc, v1, v5 +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v1, v2 +; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v2, v1 +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] ; GCN-NEXT: v_add_i32_e32 v4, vcc, 1, v3 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc @@ -2077,11 +2049,11 @@ define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* ; TONGA-NEXT: v_mul_hi_u32 v3, v5, v3 ; TONGA-NEXT: v_mul_lo_u32 v1, v3, v2 ; TONGA-NEXT: v_add_u32_e32 v4, vcc, 1, v3 -; TONGA-NEXT: v_sub_u32_e32 v1, vcc, v5, v1 -; TONGA-NEXT: v_subrev_u32_e32 v5, vcc, v2, v1 -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 -; TONGA-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; TONGA-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; TONGA-NEXT: v_subrev_u32_e32 v1, vcc, v1, v5 +; TONGA-NEXT: v_cmp_ge_u32_e64 s[0:1], v1, v2 +; TONGA-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; TONGA-NEXT: v_subrev_u32_e32 v4, vcc, v2, v1 +; TONGA-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] ; TONGA-NEXT: v_add_u32_e32 v4, vcc, 1, v3 ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 ; TONGA-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc @@ -2105,41 +2077,37 @@ define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* ; GFX9-NEXT: s_mov_b32 s4, s0 ; GFX9-NEXT: s_mov_b32 s5, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_readfirstlane_b32 s2, v1 -; GFX9-NEXT: s_bfe_i32 s3, s2, 0x190000 -; GFX9-NEXT: s_bfe_i32 s2, s2, 0x10018 -; GFX9-NEXT: s_add_i32 s3, s3, s2 -; GFX9-NEXT: s_xor_b32 s3, s3, s2 -; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_bfe_i32 s1, s0, 0x190000 -; GFX9-NEXT: s_bfe_i32 s0, s0, 0x10018 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v1 -; GFX9-NEXT: s_add_i32 s1, s1, s0 -; GFX9-NEXT: s_xor_b32 s2, s0, s2 -; GFX9-NEXT: s_xor_b32 s0, s1, s0 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s1, 0, s3 -; GFX9-NEXT: v_readfirstlane_b32 s8, v0 -; GFX9-NEXT: s_mul_i32 s1, s1, s8 -; GFX9-NEXT: s_mul_hi_u32 s1, s8, s1 -; GFX9-NEXT: s_add_i32 s8, s8, s1 -; GFX9-NEXT: s_mul_hi_u32 s1, s0, s8 -; GFX9-NEXT: s_mul_i32 s8, s1, s3 -; GFX9-NEXT: s_sub_i32 s0, s0, s8 -; GFX9-NEXT: s_add_i32 s9, s1, 1 -; GFX9-NEXT: s_sub_i32 s8, s0, s3 -; GFX9-NEXT: s_cmp_ge_u32 s0, s3 -; GFX9-NEXT: s_cselect_b32 s1, s9, s1 -; GFX9-NEXT: s_cselect_b32 s0, s8, s0 -; GFX9-NEXT: s_add_i32 s8, s1, 1 -; GFX9-NEXT: s_cmp_ge_u32 s0, s3 -; GFX9-NEXT: s_cselect_b32 s0, s8, s1 -; GFX9-NEXT: s_xor_b32 s0, s0, s2 -; GFX9-NEXT: s_sub_i32 s0, s0, s2 -; GFX9-NEXT: s_bfe_i32 s0, s0, 0x190000 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_bfe_i32 v2, v1, 0, 25 +; GFX9-NEXT: v_bfe_i32 v1, v1, 24, 1 +; GFX9-NEXT: v_add_u32_e32 v2, v2, v1 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v1 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v2 +; GFX9-NEXT: v_sub_u32_e32 v4, 0, v2 +; GFX9-NEXT: v_bfe_i32 v5, v0, 0, 25 +; GFX9-NEXT: v_bfe_i32 v0, v0, 24, 1 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX9-NEXT: v_add_u32_e32 v5, v5, v0 +; GFX9-NEXT: v_xor_b32_e32 v5, v5, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 +; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v5, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v3, v2 +; GFX9-NEXT: v_add_u32_e32 v1, 1, v3 +; GFX9-NEXT: v_sub_u32_e32 v4, v5, v4 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_sub_u32_e32 v3, v4, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v0 +; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0 +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm ; @@ -2284,10 +2252,10 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu ; TONGA-NEXT: v_ashrrev_i32_e32 v2, 12, v2 ; TONGA-NEXT: v_lshrrev_b32_e32 v7, 31, v3 ; TONGA-NEXT: v_ashrrev_i32_e32 v3, 12, v3 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v4 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v4, v0 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v1, v5 -; TONGA-NEXT: v_add_u32_e32 v2, vcc, v6, v2 -; TONGA-NEXT: v_add_u32_e32 v3, vcc, v3, v7 +; TONGA-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; TONGA-NEXT: v_add_u32_e32 v3, vcc, v7, v3 ; TONGA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; TONGA-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 4529dc5f1d213..6c54df592da0f 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -59,9 +59,9 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_mul_lo_u32 v4, s5, v0 ; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], s[12:13] ; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_mul_lo_u32 v3, s4, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -111,13 +111,12 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v4 ; GCN-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] -; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 1, v0 +; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GCN-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] -; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 2, v0 +; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GCN-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GCN-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GCN-NEXT: v_mov_b32_e32 v6, s3 ; GCN-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 @@ -127,9 +126,10 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s11, v2 ; GCN-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: s_xor_b64 s[0:1], s[12:13], s[8:9] -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0 ; GCN-NEXT: v_xor_b32_e32 v1, s1, v1 ; GCN-NEXT: v_mov_b32_e32 v2, s1 @@ -142,7 +142,6 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i32 s0, s7, 31 ; GCN-IR-NEXT: s_mov_b32 s1, s0 @@ -154,39 +153,37 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[2:3], s[8:9] ; GCN-IR-NEXT: s_sub_u32 s6, s6, s2 ; GCN-IR-NEXT: s_subb_u32 s7, s7, s2 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[12:13], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[8:9] -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s6 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s7 -; GCN-IR-NEXT: s_min_u32 s14, s8, s9 -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s12 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s13 -; GCN-IR-NEXT: s_min_u32 s18, s8, s9 -; GCN-IR-NEXT: s_sub_u32 s16, s14, s18 -; GCN-IR-NEXT: s_subb_u32 s17, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[16:17], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[16:17], 63 -; GCN-IR-NEXT: s_or_b64 s[20:21], s[10:11], s[20:21] -; GCN-IR-NEXT: s_and_b64 s[10:11], s[20:21], exec -; GCN-IR-NEXT: s_cselect_b32 s11, 0, s13 -; GCN-IR-NEXT: s_cselect_b32 s10, 0, s12 -; GCN-IR-NEXT: s_or_b64 s[20:21], s[20:21], s[22:23] +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[12:13], 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: s_or_b64 s[16:17], s[10:11], s[14:15] +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s6 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s7 +; GCN-IR-NEXT: s_min_u32 s14, s10, s11 +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s12 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s13 +; GCN-IR-NEXT: s_min_u32 s18, s10, s11 +; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 +; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 +; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s20, s16, 1 -; GCN-IR-NEXT: s_addc_u32 s21, s17, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[20:21], 0 -; GCN-IR-NEXT: s_sub_i32 s16, 63, s16 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], s16 +; GCN-IR-NEXT: s_add_u32 s16, s10, 1 +; GCN-IR-NEXT: s_addc_u32 s17, s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[16:17], 0 +; GCN-IR-NEXT: s_sub_i32 s10, 63, s10 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], s10 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[16:17], s[12:13], s20 +; GCN-IR-NEXT: s_lshr_b64 s[16:17], s[12:13], s16 ; GCN-IR-NEXT: s_add_u32 s19, s6, -1 ; GCN-IR-NEXT: s_addc_u32 s20, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[8:9], s[14:15] @@ -217,16 +214,24 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[10:11], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[6:7] -; GCN-IR-NEXT: .LBB0_5: ; %udiv-end +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 +; GCN-IR-NEXT: s_branch .LBB0_6 +; GCN-IR-NEXT: .LBB0_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s13 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[16:17] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s12 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[16:17] +; GCN-IR-NEXT: .LBB0_6: ; %udiv-end ; GCN-IR-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[10:11], s[0:1] -; GCN-IR-NEXT: s_sub_u32 s0, s2, s0 -; GCN-IR-NEXT: s_subb_u32 s1, s3, s1 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 +; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0 +; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s1 +; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s1 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %result = sdiv i64 %x, %y @@ -480,15 +485,15 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_xor_b32 s4, s4, s8 ; GCN-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-NEXT: s_or_b32 s6, s4, 1 +; GCN-NEXT: s_or_b32 s4, s4, 1 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -510,15 +515,15 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_xor_b32 s4, s4, s8 ; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -586,16 +591,16 @@ define amdgpu_kernel void @s_test_sdiv32_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: s_xor_b32 s0, s3, s8 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_ashr_i32 s0, s0, 30 -; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: s_or_b32 s2, s0, 1 +; GCN-NEXT: s_or_b32 s0, s0, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s0 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v2 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm @@ -613,16 +618,16 @@ define amdgpu_kernel void @s_test_sdiv32_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: s_xor_b32 s0, s3, s8 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_ashr_i32 s0, s0, 30 -; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: s_or_b32 s2, s0, 1 +; GCN-IR-NEXT: s_or_b32 s0, s0, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s0 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-IR-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v2 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm @@ -650,15 +655,15 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_xor_b32 s4, s4, s8 ; GCN-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-NEXT: s_or_b32 s6, s4, 1 +; GCN-NEXT: s_or_b32 s4, s4, 1 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -680,15 +685,15 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_xor_b32 s4, s4, s8 ; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -717,15 +722,15 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_xor_b32 s4, s4, s8 ; GCN-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-NEXT: s_or_b32 s6, s4, 1 +; GCN-NEXT: s_or_b32 s4, s4, 1 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 23 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -747,15 +752,15 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_xor_b32 s4, s4, s8 ; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -784,15 +789,15 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_xor_b32 s4, s4, s8 ; GCN-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-NEXT: s_or_b32 s6, s4, 1 +; GCN-NEXT: s_or_b32 s4, s4, 1 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -814,15 +819,15 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_xor_b32 s4, s4, s8 ; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -848,35 +853,35 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 ; GCN-NEXT: v_cvt_f32_i32_e32 v1, s4 ; GCN-NEXT: s_xor_b32 s4, s4, s8 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 +; GCN-NEXT: s_or_b32 s4, s4, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: s_or_b32 s7, s4, 1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s7, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-NEXT: v_cvt_f32_i32_e32 v2, s10 +; GCN-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-NEXT: v_cvt_f32_i32_e32 v3, s6 ; GCN-NEXT: s_xor_b32 s4, s6, s10 -; GCN-NEXT: s_ashr_i32 s4, s4, 30 ; GCN-NEXT: v_rcp_iflag_f32_e32 v4, v2 -; GCN-NEXT: s_or_b32 s6, s4, 1 -; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 -; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: s_ashr_i32 s4, s4, 30 +; GCN-NEXT: s_or_b32 s4, s4, 1 +; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_mul_f32_e32 v4, v3, v4 ; GCN-NEXT: v_trunc_f32_e32 v4, v4 ; GCN-NEXT: v_mad_f32 v3, -v4, v2, v3 ; GCN-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, |v2| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, s4, v4 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_bfe_i32 v2, v2, 0, 24 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GCN-NEXT: s_endpgm @@ -894,35 +899,35 @@ define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s4 ; GCN-IR-NEXT: s_xor_b32 s4, s4, s8 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 -; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: s_or_b32 s7, s4, 1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s7, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s4, v2 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10 +; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6 ; GCN-IR-NEXT: s_xor_b32 s4, s6, s10 -; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v4, v2 -; GCN-IR-NEXT: s_or_b32 s6, s4, 1 -; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: s_ashr_i32 s4, s4, 30 +; GCN-IR-NEXT: s_or_b32 s4, s4, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v5, s4 ; GCN-IR-NEXT: v_mul_f32_e32 v4, v3, v4 ; GCN-IR-NEXT: v_trunc_f32_e32 v4, v4 ; GCN-IR-NEXT: v_mad_f32 v3, -v4, v2, v3 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, |v2| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s4, s6, 0 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s4, v4 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| +; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GCN-IR-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm @@ -961,7 +966,7 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v1| ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -971,7 +976,6 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-LABEL: s_test_sdiv24_48: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_sext_i32_i16 s5, s5 ; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[4:5], 24 @@ -991,39 +995,37 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], s[8:9] ; GCN-IR-NEXT: s_sub_u32 s6, s6, s4 ; GCN-IR-NEXT: s_subb_u32 s7, s7, s4 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[6:7], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[12:13], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s6 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s7 -; GCN-IR-NEXT: s_min_u32 s14, s8, s9 -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s12 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s13 -; GCN-IR-NEXT: s_min_u32 s18, s8, s9 -; GCN-IR-NEXT: s_sub_u32 s16, s14, s18 -; GCN-IR-NEXT: s_subb_u32 s17, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[16:17], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[16:17], 63 -; GCN-IR-NEXT: s_or_b64 s[20:21], s[10:11], s[20:21] -; GCN-IR-NEXT: s_and_b64 s[10:11], s[20:21], exec -; GCN-IR-NEXT: s_cselect_b32 s11, 0, s13 -; GCN-IR-NEXT: s_cselect_b32 s10, 0, s12 -; GCN-IR-NEXT: s_or_b64 s[20:21], s[20:21], s[22:23] +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[12:13], 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: s_or_b64 s[16:17], s[10:11], s[14:15] +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s6 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s7 +; GCN-IR-NEXT: s_min_u32 s14, s10, s11 +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s12 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s13 +; GCN-IR-NEXT: s_min_u32 s18, s10, s11 +; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 +; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 +; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s20, s16, 1 -; GCN-IR-NEXT: s_addc_u32 s21, s17, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[20:21], 0 -; GCN-IR-NEXT: s_sub_i32 s16, 63, s16 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], s16 +; GCN-IR-NEXT: s_add_u32 s16, s10, 1 +; GCN-IR-NEXT: s_addc_u32 s17, s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[16:17], 0 +; GCN-IR-NEXT: s_sub_i32 s10, 63, s10 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], s10 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[16:17], s[12:13], s20 +; GCN-IR-NEXT: s_lshr_b64 s[16:17], s[12:13], s16 ; GCN-IR-NEXT: s_add_u32 s19, s6, -1 ; GCN-IR-NEXT: s_addc_u32 s20, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[8:9], s[14:15] @@ -1054,21 +1056,28 @@ define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3 ; GCN-IR-NEXT: .LBB9_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[10:11], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[6:7] -; GCN-IR-NEXT: .LBB9_5: ; %udiv-end -; GCN-IR-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x9 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 +; GCN-IR-NEXT: s_branch .LBB9_6 +; GCN-IR-NEXT: .LBB9_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s13 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[16:17] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s12 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[16:17] +; GCN-IR-NEXT: .LBB9_6: ; %udiv-end +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; GCN-IR-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3] -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[10:11], s[0:1] -; GCN-IR-NEXT: s_sub_u32 s0, s2, s0 -; GCN-IR-NEXT: s_subb_u32 s1, s3, s1 -; GCN-IR-NEXT: s_mov_b32 s15, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s14, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0 +; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s1 +; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 +; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s10, -1 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: buffer_store_short v0, off, s[12:15], 0 offset:4 -; GCN-IR-NEXT: s_waitcnt expcnt(0) -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 -; GCN-IR-NEXT: buffer_store_dword v0, off, s[12:15], 0 +; GCN-IR-NEXT: buffer_store_short v1, off, s[8:11], 0 offset:4 +; GCN-IR-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i48 %x, 24 %2 = ashr i48 %y, 24 @@ -1168,23 +1177,23 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v3 ; GCN-NEXT: v_cndmask_b32_e64 v3, v5, v4, s[0:1] -; GCN-NEXT: v_add_i32_e64 v4, s[0:1], 1, v0 +; GCN-NEXT: v_add_i32_e64 v4, s[0:1], 2, v0 ; GCN-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] -; GCN-NEXT: v_add_i32_e64 v6, s[0:1], 2, v0 +; GCN-NEXT: v_add_i32_e64 v6, s[0:1], 1, v0 ; GCN-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, s[0:1] ; GCN-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v6, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 ; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v6, v4, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GCN-NEXT: v_xor_b32_e32 v0, s8, v0 ; GCN-NEXT: v_xor_b32_e32 v1, s8, v1 ; GCN-NEXT: v_mov_b32_e32 v2, s8 @@ -1196,39 +1205,36 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-LABEL: s_test_sdiv_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i32 s4, s3, 31 ; GCN-IR-NEXT: s_mov_b32 s5, s4 ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s4 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s4 -; GCN-IR-NEXT: s_flbit_i32_b32 s10, s2 -; GCN-IR-NEXT: s_add_i32 s10, s10, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s11, s3 -; GCN-IR-NEXT: s_min_u32 s10, s10, s11 -; GCN-IR-NEXT: s_add_u32 s12, s10, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s13, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 63 -; GCN-IR-NEXT: s_or_b64 s[14:15], s[8:9], s[14:15] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[14:15], exec -; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[14:15] -; GCN-IR-NEXT: s_mov_b32 s9, 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s10, s8, s9 +; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s14, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s15, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 -; GCN-IR-NEXT: s_sub_i32 s11, 63, s12 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GCN-IR-NEXT: s_lshl_b64 s[8:9], 24, s11 +; GCN-IR-NEXT: s_add_u32 s12, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[12:13], 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[14:15] +; GCN-IR-NEXT: s_lshl_b64 s[8:9], 24, s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[12:13], 24, s14 +; GCN-IR-NEXT: s_lshr_b64 s[12:13], 24, s12 ; GCN-IR-NEXT: s_add_u32 s16, s2, -1 ; GCN-IR-NEXT: s_addc_u32 s17, s3, -1 ; GCN-IR-NEXT: s_sub_u32 s10, 58, s10 @@ -1258,15 +1264,21 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[2:3] -; GCN-IR-NEXT: .LBB10_5: ; %udiv-end -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[8:9], s[4:5] -; GCN-IR-NEXT: s_sub_u32 s4, s6, s4 -; GCN-IR-NEXT: s_subb_u32 s5, s7, s5 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 +; GCN-IR-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB10_6 +; GCN-IR-NEXT: .LBB10_5: +; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 24, 0, s[12:13] +; GCN-IR-NEXT: .LBB10_6: ; %udiv-end +; GCN-IR-NEXT: v_xor_b32_e32 v0, s4, v0 +; GCN-IR-NEXT: v_xor_b32_e32 v1, s5, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s5 +; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %result = sdiv i64 24, %x @@ -1777,16 +1789,16 @@ define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 % ; GCN-NEXT: s_mov_b32 s4, s0 ; GCN-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-NEXT: v_rcp_iflag_f32_e32 v1, v0 +; GCN-NEXT: s_or_b32 s0, s0, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s0 ; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: s_or_b32 s2, s0, 1 ; GCN-NEXT: v_mul_f32_e32 v1, 0x41c00000, v1 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-NEXT: v_mad_f32 v2, -v1, v0, s3 ; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v1 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1804,16 +1816,16 @@ define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v1, v0 +; GCN-IR-NEXT: s_or_b32 s0, s0, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s0 ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: s_or_b32 s2, s0, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x41c00000, v1 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-IR-NEXT: v_mad_f32 v2, -v1, v0, s3 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| -; GCN-IR-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-IR-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v1 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1836,17 +1848,17 @@ define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 % ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s2 ; GCN-NEXT: s_mov_b32 s4, s0 ; GCN-NEXT: s_ashr_i32 s0, s2, 30 -; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_or_b32 s0, s0, 1 ; GCN-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-NEXT: v_mad_f32 v0, -v1, s8, v0 ; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-NEXT: s_or_b32 s2, s0, 1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, s8 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm @@ -1862,17 +1874,17 @@ define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 % ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s2 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: s_ashr_i32 s0, s2, 30 -; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: s_or_b32 s0, s0, 1 ; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-IR-NEXT: v_mad_f32 v0, -v1, s8, v0 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-IR-NEXT: s_or_b32 s2, s0, 1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, s8 -; GCN-IR-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-IR-NEXT: s_cselect_b32 s0, s2, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8 +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll index 772f07f3aecd0..4e2f8afba64a1 100644 --- a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll @@ -9,24 +9,23 @@ define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dword s2, s[2:3], 0x0 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s4, 1, s2 +; GCN-NEXT: s_lshr_b32 s0, 1, s2 +; GCN-NEXT: s_ff1_i32_b32 s0, s0 ; GCN-NEXT: s_cmp_lg_u32 s2, 0 -; GCN-NEXT: s_ff1_i32_b32 s2, s4 -; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s2, -1, s2 -; GCN-NEXT: s_flbit_i32 s6, s2 -; GCN-NEXT: s_sub_i32 s8, 31, s6 -; GCN-NEXT: s_cmp_eq_u32 s2, 0 -; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0 -; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s4, -1, s8 -; GCN-NEXT: s_mov_b32 s2, -1 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[2:3] +; GCN-NEXT: v_ffbh_i32_e32 v1, v0 +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, 31, v1 +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[0:1] +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm %v = load i32, i32 addrspace(1)* %arrayidx, align 4 %sr = lshr i32 1, %v diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll index 454a45d6365da..07ccf84c70feb 100644 --- a/llvm/test/CodeGen/AMDGPU/select-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll @@ -9,11 +9,10 @@ ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 -; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]] -; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec -; GCN: s_cselect_b32 [[RESULT:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] -; GCN: buffer_store_dword [[VRESULT]] +; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { %icmp0 = icmp ne i32 %a, %b %icmp1 = icmp ne i32 %a, %c @@ -26,11 +25,10 @@ define amdgpu_kernel void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i3 ; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32: ; GCN-DAG: v_cmp_lg_f32_e32 vcc ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] -; GCN: s_and_b64 [[CMP1]], vcc, [[CMP1]] -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], exec -; GCN: s_cselect_b32 [[RESULT:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] -; GCN: buffer_store_dword [[VRESULT]] +; GCN: s_and_b64 vcc, vcc, [[CMP1]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { %fcmp0 = fcmp one float %a, %b %fcmp1 = fcmp one float %a, %c @@ -45,13 +43,10 @@ define amdgpu_kernel void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, fl ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 -; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]] -; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec -; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]] -; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]] -; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]] +; GCN: s_and_b64 vcc, [[CMP1]], [[CMP2]] +; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { %icmp0 = icmp ne i32 %a, %b %icmp1 = icmp ne i32 %a, %c @@ -64,13 +59,10 @@ define amdgpu_kernel void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i3 ; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32: ; GCN-DAG: v_cmp_lg_f32_e32 vcc, ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] -; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] -; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec -; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]] -; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]] -; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]] +; GCN: s_and_b64 vcc, vcc, [[CMP1]] +; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { %fcmp0 = fcmp one float %a, %b %fcmp1 = fcmp one float %a, %c @@ -85,11 +77,10 @@ define amdgpu_kernel void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, fl ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 -; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]] -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec -; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] -; GCN: buffer_store_dword [[VRESULT]] +; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] ; GCN: s_endpgm define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 { %icmp0 = icmp ne i32 %a, %b @@ -103,11 +94,10 @@ define amdgpu_kernel void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32: ; GCN-DAG: v_cmp_lg_f32_e32 vcc ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec -; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] -; GCN: buffer_store_dword [[VRESULT]] +; GCN: s_or_b64 vcc, vcc, [[CMP1]] +; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 { %fcmp0 = fcmp one float %a, %b %fcmp1 = fcmp one float %a, %c @@ -122,13 +112,10 @@ define amdgpu_kernel void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, flo ; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 ; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0 -; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]] -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec -; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]] -; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]] -; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]] +; GCN: s_or_b64 vcc, [[CMP1]], [[CMP2]] +; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 { %icmp0 = icmp ne i32 %a, %b %icmp1 = icmp ne i32 %a, %c @@ -141,13 +128,10 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32: ; GCN-DAG: v_cmp_lg_f32_e32 vcc, ; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] -; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec -; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]] -; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]] -; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]] -; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]] +; GCN: s_or_b64 vcc, vcc, [[CMP1]] +; GCN: v_cndmask_b32_e32 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: v_cndmask_b32_e32 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, vcc +; GCN: buffer_store_dwordx2 v[[[RESULT0]]:[[RESULT1]]] define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 { %fcmp0 = fcmp one float %a, %b %fcmp1 = fcmp one float %a, %c diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll index efaf2b2e1c469..aaaa68f571325 100644 --- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll @@ -69,7 +69,7 @@ define amdgpu_kernel void @v_select_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8 ; GFX89: s_cselect_b32 ; GFX89-NOT: s_cselect_b32 -; SI: s_cselect_b32 +; SI: v_cndmask_b32 ; SI-NOT: cndmask define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 { %cmp = icmp eq i8 %c, 0 @@ -83,7 +83,7 @@ define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, ; GFX89: s_cselect_b32 ; GFX89-NOT: s_cselect_b32 -; SI: s_cselect_b32 +; SI: v_cndmask_b32_e32 ; SI-NOT: v_cndmask_b32e define amdgpu_kernel void @select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 @@ -111,10 +111,10 @@ define amdgpu_kernel void @v_select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16 ; SI: cndmask ; SI-NOT: cndmask -; VI: s_cselect_b32 -; VI: s_cselect_b32 -; GFX9: cndmask -; GFX9: cndmask +; GFX89: v_cndmask_b32_e32 +; GFX89: cndmask +; VI: cndmask +; GFX89-NOT: cndmask define amdgpu_kernel void @v_select_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr, i32 %c) #0 { %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.ptr %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.ptr @@ -156,8 +156,8 @@ define amdgpu_kernel void @v_select_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16 ; vector select with SGPR inputs. ; GCN-LABEL: {{^}}s_select_v2i32: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx2 define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 @@ -167,10 +167,10 @@ define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32 } ; GCN-LABEL: {{^}}s_select_v4i32: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 @@ -198,14 +198,14 @@ bb: } ; GCN-LABEL: {{^}}select_v8i32: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b @@ -214,9 +214,15 @@ define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> } ; GCN-LABEL: {{^}}s_select_v2f32: +; GCN-DAG: s_load_dwordx4 s[[[ALO:[0-9]+]]:[[BHI:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} + +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}} -; GCN-DAG: s_cselect_b32 -; GCN-DAG: s_cselect_b32 + +; GCN-DAG: v_cndmask_b32_e32 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; GCN-DAG: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx2 define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 @@ -228,9 +234,9 @@ define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x f ; GCN-LABEL: {{^}}s_select_v3f32: ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}} -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx define amdgpu_kernel void @s_select_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b, i32 %c) #0 { @@ -244,10 +250,10 @@ define amdgpu_kernel void @s_select_v3f32(<3 x float> addrspace(1)* %out, <3 x f ; GCN: s_load_dwordx8 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}} -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) #0 { @@ -278,11 +284,11 @@ bb: ; GCN-LABEL: {{^}}s_select_v5f32: ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}} -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 ; GCN: buffer_store_dwordx define amdgpu_kernel void @s_select_v5f32(<5 x float> addrspace(1)* %out, <5 x float> %a, <5 x float> %b, i32 %c) #0 { @@ -309,10 +315,10 @@ define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x flo } ; GCN-LABEL: {{^}}select_v2f64: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <2 x double> %a, <2 x double> %b @@ -321,14 +327,14 @@ define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x do } ; GCN-LABEL: {{^}}select_v4f64: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x double> %a, <4 x double> %b @@ -337,22 +343,22 @@ define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x do } ; GCN-LABEL: {{^}}select_v8f64: -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x double> %a, <8 x double> %b diff --git a/llvm/test/CodeGen/AMDGPU/select64.ll b/llvm/test/CodeGen/AMDGPU/select64.ll index 0224708fb586c..61ce9c12526d4 100644 --- a/llvm/test/CodeGen/AMDGPU/select64.ll +++ b/llvm/test/CodeGen/AMDGPU/select64.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=SI,GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=VI,GCN %s ; GCN-LABEL: {{^}}select0: ; i64 select should be split into two i32 selects, and we shouldn't need ; to use a shfit to extract the hi dword of the input. ; GCN-NOT: s_lshr_b64 -; GCN: s_cselect_b32 -; GCN: s_cselect_b32 +; GCN: v_cndmask +; GCN: v_cndmask define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) { entry: %0 = icmp ugt i32 %cond, 5 @@ -16,8 +16,10 @@ entry: } ; GCN-LABEL: {{^}}select_trunc_i64: -; GCN: s_cselect_b32 -; GCN-NOT: s_cselect_b32 +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind { %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 0, i64 %in @@ -27,8 +29,10 @@ define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i } ; GCN-LABEL: {{^}}select_trunc_i64_2: -; GCN: s_cselect_b32 -; GCN-NOT: s_cselect_b32 +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind { %cmp = icmp ugt i32 %cond, 5 %sel = select i1 %cmp, i64 %a, i64 %b @@ -38,8 +42,10 @@ define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, } ; GCN-LABEL: {{^}}v_select_trunc_i64_2: -; GCN: s_cselect_b32 -; GCN-NOT: s_cselect_b32 +; VI: s_cselect_b32 +; VI-NOT: s_cselect_b32 +; SI: v_cndmask_b32 +; SI-NOT: v_cndmask_b32 define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %cmp = icmp ugt i32 %cond, 5 %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -51,8 +57,8 @@ define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %con } ; GCN-LABEL: {{^}}v_select_i64_split_imm: -; GCN-DAG: s_cselect_b32 -; GCN-DAG: s_cselect_b32 +; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; GCN-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}} ; GCN: s_endpgm define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %cmp = icmp ugt i32 %cond, 5 diff --git a/llvm/test/CodeGen/AMDGPU/selectcc.ll b/llvm/test/CodeGen/AMDGPU/selectcc.ll index b743ba3ba8879..54a26a4cf676a 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc.ll @@ -10,7 +10,8 @@ ; EG: CNDE_INT ; SI: v_cmp_eq_u64 ; VI: s_cmp_eq_u64 -; GCN: s_cselect_b32 +; GCN: v_cndmask +; GCN: v_cndmask define amdgpu_kernel void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) { entry: %0 = icmp eq i64 %lhs, %rhs diff --git a/llvm/test/CodeGen/AMDGPU/setcc64.ll b/llvm/test/CodeGen/AMDGPU/setcc64.ll index 5b7099ff7ed2d..718cf7015a4a3 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc64.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc64.ll @@ -261,8 +261,8 @@ entry: } ; GCN-LABEL: {{^}}i128_sle: +; GCN: v_cmp_le_u64 ; GCN: v_cmp_le_i64 -; CGV: v_cndmask ; SI: v_cmp_eq_u64 ; VI: s_cmp_eq_u64 define amdgpu_kernel void @i128_sle(i32 addrspace(1)* %out, i128 %a, i128 %b) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll index 61d4c7d271e3a..32a2242d5e937 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll @@ -22,13 +22,14 @@ ; DEFAULT-NEXT: Live Register Matrix ; DEFAULT-NEXT: Greedy Register Allocator ; DEFAULT-NEXT: GCN NSA Reassign +; DEFAULT-NEXT: SI Simplify Predicated Copies ; DEFAULT-NEXT: Virtual Register Rewriter ; DEFAULT-NEXT: Stack Slot Coloring ; O0: Fast Register Allocator ; O0-NEXT: SI lower SGPR spill instructions ; O0-NEXT: Fast Register Allocator -; O0-NEXT: SI Fix VGPR copies +; O0-NEXT: SI Simplify Predicated Copies @@ -50,6 +51,7 @@ ; BASIC-DEFAULT-NEXT: Machine Optimization Remark Emitter ; BASIC-DEFAULT-NEXT: Greedy Register Allocator ; BASIC-DEFAULT-NEXT: GCN NSA Reassign +; BASIC-DEFAULT-NEXT: SI Simplify Predicated Copies ; BASIC-DEFAULT-NEXT: Virtual Register Rewriter ; BASIC-DEFAULT-NEXT: Stack Slot Coloring @@ -62,6 +64,7 @@ ; DEFAULT-BASIC-NEXT: Live Register Matrix ; DEFAULT-BASIC-NEXT: Basic Register Allocator ; DEFAULT-BASIC-NEXT: GCN NSA Reassign +; DEFAULT-BASIC-NEXT: SI Simplify Predicated Copies ; DEFAULT-BASIC-NEXT: Virtual Register Rewriter ; DEFAULT-BASIC-NEXT: Stack Slot Coloring @@ -80,6 +83,7 @@ ; BASIC-BASIC-NEXT: Live Register Matrix ; BASIC-BASIC-NEXT: Basic Register Allocator ; BASIC-BASIC-NEXT: GCN NSA Reassign +; BASIC-BASIC-NEXT: SI Simplify Predicated Copies ; BASIC-BASIC-NEXT: Virtual Register Rewriter ; BASIC-BASIC-NEXT: Stack Slot Coloring diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir index 0cd03c4ccffb7..7e4619cc04b31 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -start-before=si-lower-sgpr-spills -stop-after=prologepilog -o - %s | FileCheck %s # After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0. # Otherwise, the test would crash during PEI while trying to replace the dead frame index. @@ -41,11 +42,20 @@ machineFunctionInfo: body: | ; CHECK-LABEL: name: test ; CHECK: bb.0: - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, $vgpr0 - ; CHECK: DBG_DEF <{{.*}}>, $noreg - ; CHECK: bb.1: - ; CHECK: $sgpr10 = V_READLANE_B32 $vgpr0, 0 - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr10 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, killed $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 0 + ; CHECK-NEXT: KILL killed renamable $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 bb.0: renamable $sgpr10 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir index 687adc69bd148..f40c4977bba6e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir @@ -1,4 +1,6 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck -check-prefix=PEI %s # After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0. # Otherwise, the test would crash during PEI while trying to replace the dead frame index. @@ -39,13 +41,21 @@ machineFunctionInfo: workGroupIDX: { reg: '$sgpr8' } privateSegmentWaveByteOffset: { reg: '$sgpr9' } body: | - ; CHECK-LABEL: name: test - ; CHECK: bb.0: - ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, $vgpr0 - ; CHECK: DBG_VALUE $noreg, 0 - ; CHECK: bb.1: - ; CHECK: $sgpr10 = V_READLANE_B32 $vgpr0, 0 - ; CHECK: S_ENDPGM 0 + ; SGPR_SPILL-LABEL: name: test + ; SGPR_SPILL: bb.0: + ; SGPR_SPILL: [[VGPR:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; SGPR_SPILL: [[VGPR]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[VGPR]] + ; SGPR_SPILL: DBG_VALUE $noreg, 0 + ; SGPR_SPILL: bb.1: + ; SGPR_SPILL: $sgpr10 = V_READLANE_B32 [[VGPR]], 0 + ; SGPR_SPILL: S_ENDPGM 0 + ; PEI-LABEL: name: test + ; PEI: bb.0: + ; PEI: renamable $[[VGPR:vgpr[0-9]+]] = IMPLICIT_DEF + ; PEI: renamable $[[VGPR]] = V_WRITELANE_B32 killed $sgpr10, 0, killed $[[VGPR]] + ; PEI: bb.1: + ; PEI: $sgpr10 = V_READLANE_B32 $[[VGPR]], 0 + ; PEI: S_ENDPGM 0 bb.0: renamable $sgpr10 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-def-heterogeneous-dwarf.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-def-heterogeneous-dwarf.mir index 8c37776ce1a4b..99d7224daf182 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-def-heterogeneous-dwarf.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-def-heterogeneous-dwarf.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s # After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, we replace the dead frame index in the DBG_DEF instruction with reg 0. # Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. @@ -45,7 +45,7 @@ machineFunctionInfo: body: | ; CHECK-LABEL: name: test ; CHECK: bb.0: - ; CHECK: DBG_DEF {{.*}}, 0 + ; CHECK: DBG_DEF bb.0: renamable $sgpr10 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir index 4694810379fe0..a6cb7d4af7641 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s # After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, we replace the dead frame index in the DBG_VALUE instruction with reg 0. # Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. @@ -45,7 +45,7 @@ machineFunctionInfo: body: | ; CHECK-LABEL: name: test ; CHECK: bb.0: - ; CHECK: DBG_VALUE $noreg, 0 + ; CHECK: DBG_VALUE bb.0: renamable $sgpr10 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll index c17b5dda959ae..c26ed7082938b 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -1,17 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; The first 64 SGPR spills can go to a VGPR, but there isn't a second -; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element. +; This test was originally written when SGPRs are spilled directly to physical VGPRs and +; stressed a case when there wasn't enough VGPRs to accommodate all spills. +; When we started spilling them into virtual VGPR lanes, we always succeed in doing so. +; The regalloc pass later takes care of allocating VGPRs to these virtual registers. define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: partial_no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: ; GCN-NEXT: s_add_u32 s0, s0, s7 ; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -23,179 +33,179 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %o ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v23, s8, 0 -; GCN-NEXT: v_writelane_b32 v23, s9, 1 -; GCN-NEXT: v_writelane_b32 v23, s10, 2 -; GCN-NEXT: v_writelane_b32 v23, s11, 3 -; GCN-NEXT: v_writelane_b32 v23, s12, 4 -; GCN-NEXT: v_writelane_b32 v23, s13, 5 -; GCN-NEXT: v_writelane_b32 v23, s14, 6 -; GCN-NEXT: v_writelane_b32 v23, s15, 7 -; GCN-NEXT: v_writelane_b32 v23, s16, 8 -; GCN-NEXT: v_writelane_b32 v23, s17, 9 -; GCN-NEXT: v_writelane_b32 v23, s18, 10 -; GCN-NEXT: v_writelane_b32 v23, s19, 11 -; GCN-NEXT: v_writelane_b32 v23, s20, 12 -; GCN-NEXT: v_writelane_b32 v23, s21, 13 -; GCN-NEXT: v_writelane_b32 v23, s22, 14 -; GCN-NEXT: v_writelane_b32 v23, s23, 15 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_writelane_b32 v1, s8, 0 +; GCN-NEXT: v_writelane_b32 v1, s9, 1 +; GCN-NEXT: v_writelane_b32 v1, s10, 2 +; GCN-NEXT: v_writelane_b32 v1, s11, 3 +; GCN-NEXT: v_writelane_b32 v1, s12, 4 +; GCN-NEXT: v_writelane_b32 v1, s13, 5 +; GCN-NEXT: v_writelane_b32 v1, s14, 6 +; GCN-NEXT: v_writelane_b32 v1, s15, 7 +; GCN-NEXT: v_writelane_b32 v1, s16, 8 +; GCN-NEXT: v_writelane_b32 v1, s17, 9 +; GCN-NEXT: v_writelane_b32 v1, s18, 10 +; GCN-NEXT: v_writelane_b32 v1, s19, 11 +; GCN-NEXT: v_writelane_b32 v1, s20, 12 +; GCN-NEXT: v_writelane_b32 v1, s21, 13 +; GCN-NEXT: v_writelane_b32 v1, s22, 14 +; GCN-NEXT: v_writelane_b32 v1, s23, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v23, s8, 16 -; GCN-NEXT: v_writelane_b32 v23, s9, 17 -; GCN-NEXT: v_writelane_b32 v23, s10, 18 -; GCN-NEXT: v_writelane_b32 v23, s11, 19 -; GCN-NEXT: v_writelane_b32 v23, s12, 20 -; GCN-NEXT: v_writelane_b32 v23, s13, 21 -; GCN-NEXT: v_writelane_b32 v23, s14, 22 -; GCN-NEXT: v_writelane_b32 v23, s15, 23 -; GCN-NEXT: v_writelane_b32 v23, s16, 24 -; GCN-NEXT: v_writelane_b32 v23, s17, 25 -; GCN-NEXT: v_writelane_b32 v23, s18, 26 -; GCN-NEXT: v_writelane_b32 v23, s19, 27 -; GCN-NEXT: v_writelane_b32 v23, s20, 28 -; GCN-NEXT: v_writelane_b32 v23, s21, 29 -; GCN-NEXT: v_writelane_b32 v23, s22, 30 -; GCN-NEXT: v_writelane_b32 v23, s23, 31 +; GCN-NEXT: v_writelane_b32 v1, s8, 16 +; GCN-NEXT: v_writelane_b32 v1, s9, 17 +; GCN-NEXT: v_writelane_b32 v1, s10, 18 +; GCN-NEXT: v_writelane_b32 v1, s11, 19 +; GCN-NEXT: v_writelane_b32 v1, s12, 20 +; GCN-NEXT: v_writelane_b32 v1, s13, 21 +; GCN-NEXT: v_writelane_b32 v1, s14, 22 +; GCN-NEXT: v_writelane_b32 v1, s15, 23 +; GCN-NEXT: v_writelane_b32 v1, s16, 24 +; GCN-NEXT: v_writelane_b32 v1, s17, 25 +; GCN-NEXT: v_writelane_b32 v1, s18, 26 +; GCN-NEXT: v_writelane_b32 v1, s19, 27 +; GCN-NEXT: v_writelane_b32 v1, s20, 28 +; GCN-NEXT: v_writelane_b32 v1, s21, 29 +; GCN-NEXT: v_writelane_b32 v1, s22, 30 +; GCN-NEXT: v_writelane_b32 v1, s23, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v23, s8, 32 -; GCN-NEXT: v_writelane_b32 v23, s9, 33 -; GCN-NEXT: v_writelane_b32 v23, s10, 34 -; GCN-NEXT: v_writelane_b32 v23, s11, 35 -; GCN-NEXT: v_writelane_b32 v23, s12, 36 -; GCN-NEXT: v_writelane_b32 v23, s13, 37 -; GCN-NEXT: v_writelane_b32 v23, s14, 38 -; GCN-NEXT: v_writelane_b32 v23, s15, 39 -; GCN-NEXT: v_writelane_b32 v23, s16, 40 -; GCN-NEXT: v_writelane_b32 v23, s17, 41 -; GCN-NEXT: v_writelane_b32 v23, s18, 42 -; GCN-NEXT: v_writelane_b32 v23, s19, 43 -; GCN-NEXT: v_writelane_b32 v23, s20, 44 -; GCN-NEXT: v_writelane_b32 v23, s21, 45 -; GCN-NEXT: v_writelane_b32 v23, s22, 46 -; GCN-NEXT: v_writelane_b32 v23, s23, 47 +; GCN-NEXT: v_writelane_b32 v1, s8, 32 +; GCN-NEXT: v_writelane_b32 v1, s9, 33 +; GCN-NEXT: v_writelane_b32 v1, s10, 34 +; GCN-NEXT: v_writelane_b32 v1, s11, 35 +; GCN-NEXT: v_writelane_b32 v1, s12, 36 +; GCN-NEXT: v_writelane_b32 v1, s13, 37 +; GCN-NEXT: v_writelane_b32 v1, s14, 38 +; GCN-NEXT: v_writelane_b32 v1, s15, 39 +; GCN-NEXT: v_writelane_b32 v1, s16, 40 +; GCN-NEXT: v_writelane_b32 v1, s17, 41 +; GCN-NEXT: v_writelane_b32 v1, s18, 42 +; GCN-NEXT: v_writelane_b32 v1, s19, 43 +; GCN-NEXT: v_writelane_b32 v1, s20, 44 +; GCN-NEXT: v_writelane_b32 v1, s21, 45 +; GCN-NEXT: v_writelane_b32 v1, s22, 46 +; GCN-NEXT: v_writelane_b32 v1, s23, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v23, s8, 48 -; GCN-NEXT: v_writelane_b32 v23, s9, 49 -; GCN-NEXT: v_writelane_b32 v23, s10, 50 -; GCN-NEXT: v_writelane_b32 v23, s11, 51 -; GCN-NEXT: v_writelane_b32 v23, s12, 52 -; GCN-NEXT: v_writelane_b32 v23, s13, 53 -; GCN-NEXT: v_writelane_b32 v23, s14, 54 -; GCN-NEXT: v_writelane_b32 v23, s15, 55 -; GCN-NEXT: v_writelane_b32 v23, s16, 56 -; GCN-NEXT: v_writelane_b32 v23, s17, 57 -; GCN-NEXT: v_writelane_b32 v23, s18, 58 -; GCN-NEXT: v_writelane_b32 v23, s19, 59 -; GCN-NEXT: v_writelane_b32 v23, s20, 60 -; GCN-NEXT: v_writelane_b32 v23, s21, 61 -; GCN-NEXT: v_writelane_b32 v23, s22, 62 -; GCN-NEXT: v_writelane_b32 v23, s23, 63 +; GCN-NEXT: v_writelane_b32 v1, s8, 48 +; GCN-NEXT: v_writelane_b32 v1, s9, 49 +; GCN-NEXT: v_writelane_b32 v1, s10, 50 +; GCN-NEXT: v_writelane_b32 v1, s11, 51 +; GCN-NEXT: v_writelane_b32 v1, s12, 52 +; GCN-NEXT: v_writelane_b32 v1, s13, 53 +; GCN-NEXT: v_writelane_b32 v1, s14, 54 +; GCN-NEXT: v_writelane_b32 v1, s15, 55 +; GCN-NEXT: v_writelane_b32 v1, s16, 56 +; GCN-NEXT: v_writelane_b32 v1, s17, 57 +; GCN-NEXT: v_writelane_b32 v1, s18, 58 +; GCN-NEXT: v_writelane_b32 v1, s19, 59 +; GCN-NEXT: v_writelane_b32 v1, s20, 60 +; GCN-NEXT: v_writelane_b32 v1, s21, 61 +; GCN-NEXT: v_writelane_b32 v1, s22, 62 +; GCN-NEXT: v_writelane_b32 v1, s23, 63 +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[6:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[8:9], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_writelane_b32 v0, s6, 0 ; GCN-NEXT: v_writelane_b32 v0, s7, 1 +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s4, s5 ; GCN-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s4, v23, 0 -; GCN-NEXT: v_readlane_b32 s5, v23, 1 -; GCN-NEXT: v_readlane_b32 s6, v23, 2 -; GCN-NEXT: v_readlane_b32 s7, v23, 3 -; GCN-NEXT: v_readlane_b32 s8, v23, 4 -; GCN-NEXT: v_readlane_b32 s9, v23, 5 -; GCN-NEXT: v_readlane_b32 s10, v23, 6 -; GCN-NEXT: v_readlane_b32 s11, v23, 7 -; GCN-NEXT: v_readlane_b32 s12, v23, 8 -; GCN-NEXT: v_readlane_b32 s13, v23, 9 -; GCN-NEXT: v_readlane_b32 s14, v23, 10 -; GCN-NEXT: v_readlane_b32 s15, v23, 11 -; GCN-NEXT: v_readlane_b32 s16, v23, 12 -; GCN-NEXT: v_readlane_b32 s17, v23, 13 -; GCN-NEXT: v_readlane_b32 s18, v23, 14 -; GCN-NEXT: v_readlane_b32 s19, v23, 15 +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s4, v1, 0 +; GCN-NEXT: v_readlane_b32 s5, v1, 1 +; GCN-NEXT: v_readlane_b32 s6, v1, 2 +; GCN-NEXT: v_readlane_b32 s7, v1, 3 +; GCN-NEXT: v_readlane_b32 s8, v1, 4 +; GCN-NEXT: v_readlane_b32 s9, v1, 5 +; GCN-NEXT: v_readlane_b32 s10, v1, 6 +; GCN-NEXT: v_readlane_b32 s11, v1, 7 +; GCN-NEXT: v_readlane_b32 s12, v1, 8 +; GCN-NEXT: v_readlane_b32 s13, v1, 9 +; GCN-NEXT: v_readlane_b32 s14, v1, 10 +; GCN-NEXT: v_readlane_b32 s15, v1, 11 +; GCN-NEXT: v_readlane_b32 s16, v1, 12 +; GCN-NEXT: v_readlane_b32 s17, v1, 13 +; GCN-NEXT: v_readlane_b32 s18, v1, 14 +; GCN-NEXT: v_readlane_b32 s19, v1, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v23, 16 -; GCN-NEXT: v_readlane_b32 s5, v23, 17 -; GCN-NEXT: v_readlane_b32 s6, v23, 18 -; GCN-NEXT: v_readlane_b32 s7, v23, 19 -; GCN-NEXT: v_readlane_b32 s8, v23, 20 -; GCN-NEXT: v_readlane_b32 s9, v23, 21 -; GCN-NEXT: v_readlane_b32 s10, v23, 22 -; GCN-NEXT: v_readlane_b32 s11, v23, 23 -; GCN-NEXT: v_readlane_b32 s12, v23, 24 -; GCN-NEXT: v_readlane_b32 s13, v23, 25 -; GCN-NEXT: v_readlane_b32 s14, v23, 26 -; GCN-NEXT: v_readlane_b32 s15, v23, 27 -; GCN-NEXT: v_readlane_b32 s16, v23, 28 -; GCN-NEXT: v_readlane_b32 s17, v23, 29 -; GCN-NEXT: v_readlane_b32 s18, v23, 30 -; GCN-NEXT: v_readlane_b32 s19, v23, 31 +; GCN-NEXT: v_readlane_b32 s4, v1, 16 +; GCN-NEXT: v_readlane_b32 s5, v1, 17 +; GCN-NEXT: v_readlane_b32 s6, v1, 18 +; GCN-NEXT: v_readlane_b32 s7, v1, 19 +; GCN-NEXT: v_readlane_b32 s8, v1, 20 +; GCN-NEXT: v_readlane_b32 s9, v1, 21 +; GCN-NEXT: v_readlane_b32 s10, v1, 22 +; GCN-NEXT: v_readlane_b32 s11, v1, 23 +; GCN-NEXT: v_readlane_b32 s12, v1, 24 +; GCN-NEXT: v_readlane_b32 s13, v1, 25 +; GCN-NEXT: v_readlane_b32 s14, v1, 26 +; GCN-NEXT: v_readlane_b32 s15, v1, 27 +; GCN-NEXT: v_readlane_b32 s16, v1, 28 +; GCN-NEXT: v_readlane_b32 s17, v1, 29 +; GCN-NEXT: v_readlane_b32 s18, v1, 30 +; GCN-NEXT: v_readlane_b32 s19, v1, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v23, 32 -; GCN-NEXT: v_readlane_b32 s5, v23, 33 -; GCN-NEXT: v_readlane_b32 s6, v23, 34 -; GCN-NEXT: v_readlane_b32 s7, v23, 35 -; GCN-NEXT: v_readlane_b32 s8, v23, 36 -; GCN-NEXT: v_readlane_b32 s9, v23, 37 -; GCN-NEXT: v_readlane_b32 s10, v23, 38 -; GCN-NEXT: v_readlane_b32 s11, v23, 39 -; GCN-NEXT: v_readlane_b32 s12, v23, 40 -; GCN-NEXT: v_readlane_b32 s13, v23, 41 -; GCN-NEXT: v_readlane_b32 s14, v23, 42 -; GCN-NEXT: v_readlane_b32 s15, v23, 43 -; GCN-NEXT: v_readlane_b32 s16, v23, 44 -; GCN-NEXT: v_readlane_b32 s17, v23, 45 -; GCN-NEXT: v_readlane_b32 s18, v23, 46 -; GCN-NEXT: v_readlane_b32 s19, v23, 47 +; GCN-NEXT: v_readlane_b32 s4, v1, 32 +; GCN-NEXT: v_readlane_b32 s5, v1, 33 +; GCN-NEXT: v_readlane_b32 s6, v1, 34 +; GCN-NEXT: v_readlane_b32 s7, v1, 35 +; GCN-NEXT: v_readlane_b32 s8, v1, 36 +; GCN-NEXT: v_readlane_b32 s9, v1, 37 +; GCN-NEXT: v_readlane_b32 s10, v1, 38 +; GCN-NEXT: v_readlane_b32 s11, v1, 39 +; GCN-NEXT: v_readlane_b32 s12, v1, 40 +; GCN-NEXT: v_readlane_b32 s13, v1, 41 +; GCN-NEXT: v_readlane_b32 s14, v1, 42 +; GCN-NEXT: v_readlane_b32 s15, v1, 43 +; GCN-NEXT: v_readlane_b32 s16, v1, 44 +; GCN-NEXT: v_readlane_b32 s17, v1, 45 +; GCN-NEXT: v_readlane_b32 s18, v1, 46 +; GCN-NEXT: v_readlane_b32 s19, v1, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s8, v23, 48 -; GCN-NEXT: v_readlane_b32 s9, v23, 49 -; GCN-NEXT: v_readlane_b32 s10, v23, 50 -; GCN-NEXT: v_readlane_b32 s11, v23, 51 -; GCN-NEXT: v_readlane_b32 s12, v23, 52 -; GCN-NEXT: v_readlane_b32 s13, v23, 53 -; GCN-NEXT: v_readlane_b32 s14, v23, 54 -; GCN-NEXT: v_readlane_b32 s15, v23, 55 -; GCN-NEXT: v_readlane_b32 s16, v23, 56 -; GCN-NEXT: v_readlane_b32 s17, v23, 57 -; GCN-NEXT: v_readlane_b32 s18, v23, 58 -; GCN-NEXT: v_readlane_b32 s19, v23, 59 -; GCN-NEXT: v_readlane_b32 s20, v23, 60 -; GCN-NEXT: v_readlane_b32 s21, v23, 61 -; GCN-NEXT: v_readlane_b32 s22, v23, 62 -; GCN-NEXT: v_readlane_b32 s23, v23, 63 -; GCN-NEXT: s_mov_b64 s[6:7], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s8, v1, 48 +; GCN-NEXT: v_readlane_b32 s9, v1, 49 +; GCN-NEXT: v_readlane_b32 s10, v1, 50 +; GCN-NEXT: v_readlane_b32 s11, v1, 51 +; GCN-NEXT: v_readlane_b32 s12, v1, 52 +; GCN-NEXT: v_readlane_b32 s13, v1, 53 +; GCN-NEXT: v_readlane_b32 s14, v1, 54 +; GCN-NEXT: v_readlane_b32 s15, v1, 55 +; GCN-NEXT: v_readlane_b32 s16, v1, 56 +; GCN-NEXT: v_readlane_b32 s17, v1, 57 +; GCN-NEXT: v_readlane_b32 s18, v1, 58 +; GCN-NEXT: v_readlane_b32 s19, v1, 59 +; GCN-NEXT: v_readlane_b32 s20, v1, 60 +; GCN-NEXT: v_readlane_b32 s21, v1, 61 +; GCN-NEXT: v_readlane_b32 s22, v1, 62 +; GCN-NEXT: v_readlane_b32 s23, v1, 63 ; GCN-NEXT: v_readlane_b32 s4, v0, 0 ; GCN-NEXT: v_readlane_b32 s5, v0, 1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[8:23] ; GCN-NEXT: ;;#ASMEND @@ -203,6 +213,14 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(i32 addrspace(1)* %o ; GCN-NEXT: ; use s[4:5] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB0_2: ; %ret +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir index 9596d3b7f6359..26a5eedc3eca3 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir @@ -20,10 +20,11 @@ body: | liveins: $sgpr4 ; CHECK-LABEL: name: sgpr_spill_s64_undef_high32 - ; CHECK: liveins: $sgpr4, $vgpr0 + ; CHECK: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... @@ -45,10 +46,11 @@ body: | liveins: $sgpr5 ; CHECK-LABEL: name: sgpr_spill_s64_undef_low32 - ; CHECK: liveins: $sgpr5, $vgpr0 + ; CHECK: liveins: $sgpr5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir index c35ca3203b265..09cf212ea5cca 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -229,7 +229,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 + ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -312,7 +312,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -400,7 +400,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr64_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 + ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -484,7 +484,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr64_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -576,7 +576,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot_x2 ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 + ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -670,7 +670,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot_x2 ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll index b20f540cf2472..e7148e2eb057c 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -13,24 +13,19 @@ define amdgpu_kernel void @kernel() { ; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s38, -1 ; GCN-NEXT: s_mov_b32 s39, 0xe00000 -; GCN-NEXT: v_writelane_b32 v40, s4, 0 -; GCN-NEXT: s_add_u32 s36, s36, s11 -; GCN-NEXT: v_writelane_b32 v40, s5, 1 +; GCN-NEXT: s_add_u32 s36, s36, s9 ; GCN-NEXT: s_addc_u32 s37, s37, 0 +; GCN-NEXT: s_mov_b32 s14, s8 +; GCN-NEXT: s_add_u32 s8, s2, 36 +; GCN-NEXT: s_addc_u32 s9, s3, 0 +; GCN-NEXT: s_mov_b64 s[10:11], s[4:5] ; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] -; GCN-NEXT: v_readlane_b32 s0, v40, 0 -; GCN-NEXT: s_mov_b32 s13, s9 -; GCN-NEXT: s_mov_b32 s12, s8 -; GCN-NEXT: v_readlane_b32 s1, v40, 1 -; GCN-NEXT: s_add_u32 s8, s0, 36 -; GCN-NEXT: s_addc_u32 s9, s1, 0 ; GCN-NEXT: s_getpc_b64 s[0:1] ; GCN-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GCN-NEXT: s_mov_b32 s14, s10 -; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] -; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] +; GCN-NEXT: s_mov_b32 s13, s7 +; GCN-NEXT: s_mov_b32 s12, s6 +; GCN-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -38,7 +33,7 @@ define amdgpu_kernel void @kernel() { ; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_endpgm call void @foo() ret void diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir index f9ae303194cbe..589c7a52598c5 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -start-before=si-lower-sgpr-spills -stop-after=prologepilog -o - %s | FileCheck %s # Check that we allocate 2 emergency stack slots if we're spilling # SGPRs to memory and potentially have an offset larger than fits in @@ -21,7 +21,7 @@ # CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) -# CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr1 +# CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr10, 0, undef $vgpr1 # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) # CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index 02d75d8081158..0c0ea026cd31b 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -6,14 +6,14 @@ define void @child_function() #0 { } ; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available: -; GCN: buffer_store_dword v255, off, s[0:3], s32 -; GCN: v_writelane_b32 v255, s33, 2 +; GCN: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33 +; GCN: buffer_store_dword v255, off, s[0:3], s33 ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN: v_readlane_b32 s30, v255, 0 ; GCN: v_readlane_b32 s31, v255, 1 -; GCN: v_readlane_b32 s33, v255, 2 +; GCN: s_mov_b32 s33, [[TMP_SGPR]] ; GCN: ; NumVgprs: 256 define void @spill_sgpr_with_no_lower_vgpr_available() #0 { @@ -52,14 +52,14 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { } ; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr: -; GCN: buffer_store_dword v254, off, s[0:3], s32 -; GCN: v_writelane_b32 v254, s33, 2 +; GCN: s_mov_b32 [[TMP_SGPR:s[0-9]+]], s33 +; GCN: buffer_store_dword v254, off, s[0:3], s33 ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN: v_readlane_b32 s30, v254, 0 ; GCN: v_readlane_b32 s31, v254, 1 -; GCN: v_readlane_b32 s33, v254, 2 +; GCN: s_mov_b32 s33, [[TMP_SGPR]] define void @spill_to_lowest_available_vgpr() #0 { %alloca = alloca i32, align 4, addrspace(5) @@ -99,8 +99,8 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses: ; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 ; GCN: ; def s4 -; GCN: v_writelane_b32 v254, s4, 0 -; GCN: v_readlane_b32 s4, v254, 0 +; GCN: v_writelane_b32 v0, s4, 0 +; GCN: v_readlane_b32 s4, v0, 0 ; GCN: ; use s4 define void @spill_sgpr_with_sgpr_uses() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll index 403e553f8bec9..ed0fc095ba5d4 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -183,29 +183,35 @@ define i128 @v_ashr_i128_kv(i128 %rhs) { define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) { ; GCN-LABEL: s_shl_i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s5, s4, 64 -; GCN-NEXT: s_sub_i32 s12, 64, s4 -; GCN-NEXT: s_lshl_b64 s[6:7], s[2:3], s4 -; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s4 -; GCN-NEXT: s_lshl_b64 s[10:11], s[0:1], s5 -; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], s12 -; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] -; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s0, s0, s10 -; GCN-NEXT: s_cselect_b32 s1, s1, s11 -; GCN-NEXT: s_cselect_b32 s5, s9, 0 -; GCN-NEXT: s_cselect_b32 s6, s8, 0 -; GCN-NEXT: s_cmp_eq_u32 s4, 0 -; GCN-NEXT: s_cselect_b32 s1, s3, s1 -; GCN-NEXT: s_cselect_b32 s0, s2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s6 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: s_sub_i32 s9, 64, s8 +; GCN-NEXT: s_sub_i32 s2, s8, 64 +; GCN-NEXT: s_lshl_b64 s[0:1], s[6:7], s8 +; GCN-NEXT: s_lshr_b64 s[10:11], s[4:5], s9 +; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], s2 +; GCN-NEXT: s_or_b64 s[10:11], s[0:1], s[10:11] +; GCN-NEXT: s_cmp_lt_u32 s8, 64 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: s_cmp_eq_u32 s8, 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GCN-NEXT: v_cndmask_b32_e64 v3, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] +; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], s8 +; GCN-NEXT: v_mov_b32_e32 v0, s1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = shl i128 %lhs, %rhs @@ -216,29 +222,35 @@ define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) { define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) { ; GCN-LABEL: s_lshr_i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s5, s4, 64 -; GCN-NEXT: s_sub_i32 s12, 64, s4 -; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 -; GCN-NEXT: s_lshr_b64 s[8:9], s[2:3], s4 -; GCN-NEXT: s_lshr_b64 s[10:11], s[2:3], s5 -; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], s12 -; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3] -; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s2, s2, s10 -; GCN-NEXT: s_cselect_b32 s3, s3, s11 -; GCN-NEXT: s_cselect_b32 s5, s9, 0 -; GCN-NEXT: s_cselect_b32 s6, s8, 0 -; GCN-NEXT: s_cmp_eq_u32 s4, 0 -; GCN-NEXT: s_cselect_b32 s1, s1, s3 -; GCN-NEXT: s_cselect_b32 s0, s0, s2 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_sub_i32 s9, 64, s8 +; GCN-NEXT: s_sub_i32 s2, s8, 64 +; GCN-NEXT: s_lshr_b64 s[0:1], s[4:5], s8 +; GCN-NEXT: s_lshl_b64 s[10:11], s[6:7], s9 +; GCN-NEXT: s_lshr_b64 s[2:3], s[6:7], s2 +; GCN-NEXT: s_or_b64 s[10:11], s[0:1], s[10:11] +; GCN-NEXT: s_cmp_lt_u32 s8, 64 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: s_cmp_eq_u32 s8, 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s10 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 +; GCN-NEXT: v_mov_b32_e32 v2, s1 +; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = lshr i128 %lhs, %rhs @@ -249,30 +261,37 @@ define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) { define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) { ; GCN-LABEL: s_ashr_i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v4, 0 -; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s5, 64, s4 -; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 -; GCN-NEXT: s_sub_i32 s10, s4, 64 -; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s5 -; GCN-NEXT: s_ashr_i32 s12, s3, 31 -; GCN-NEXT: s_ashr_i64 s[10:11], s[2:3], s10 -; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s4 -; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s3, s3, s12 -; GCN-NEXT: s_cselect_b32 s2, s2, s12 -; GCN-NEXT: s_cselect_b32 s5, s6, s10 -; GCN-NEXT: s_cselect_b32 s6, s7, s11 -; GCN-NEXT: s_cmp_eq_u32 s4, 0 -; GCN-NEXT: s_cselect_b32 s1, s1, s6 -; GCN-NEXT: s_cselect_b32 s0, s0, s5 -; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_ashr_i32 s2, s7, 31 +; GCN-NEXT: s_ashr_i64 s[0:1], s[6:7], s8 +; GCN-NEXT: s_cmp_lt_u32 s8, 64 +; GCN-NEXT: v_mov_b32_e32 v0, s2 ; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: s_sub_i32 s0, s8, 64 +; GCN-NEXT: s_ashr_i64 s[2:3], s[6:7], s0 +; GCN-NEXT: s_sub_i32 s0, 64, s8 +; GCN-NEXT: s_lshl_b64 s[0:1], s[6:7], s0 +; GCN-NEXT: s_lshr_b64 s[6:7], s[4:5], s8 +; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: s_cmp_eq_u32 s8, 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v4, s6 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v6, s4 +; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = ashr i128 %lhs, %rhs @@ -432,69 +451,66 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_shl_v2i128ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v6, 16 -; GCN-NEXT: v_mov_b32_e32 v4, 0 -; GCN-NEXT: v_mov_b32_e32 v7, 0 -; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshr_b64 s[22:23], s[0:1], s22 -; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshl_b64 s[18:19], s[2:3], s8 -; GCN-NEXT: s_lshl_b64 s[20:21], s[0:1], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s3, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s3, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s2, s3 -; GCN-NEXT: s_and_b64 s[2:3], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshr_b64 s[18:19], s[4:5], s18 -; GCN-NEXT: s_lshl_b64 s[20:21], s[6:7], s12 -; GCN-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 -; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s7, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s7, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s6, s7 -; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 -; GCN-NEXT: s_and_b64 s[6:7], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s6, s1, 0 -; GCN-NEXT: s_cselect_b32 s7, s0, 0 -; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], s12 -; GCN-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s1, s1, 0 -; GCN-NEXT: s_cselect_b32 s0, s0, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s10 -; GCN-NEXT: v_mov_b32_e32 v3, s13 -; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s7 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[14:15], 0 +; GCN-NEXT: s_sub_i32 s22, 64, s12 +; GCN-NEXT: s_sub_i32 s20, s12, 64 +; GCN-NEXT: s_lshr_b64 s[22:23], s[4:5], s22 +; GCN-NEXT: s_lshl_b64 s[24:25], s[6:7], s12 +; GCN-NEXT: s_lshl_b64 s[20:21], s[4:5], s20 +; GCN-NEXT: s_or_b64 s[22:23], s[24:25], s[22:23] +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[12:13], s[14:15] +; GCN-NEXT: v_mov_b32_e32 v0, s21 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s7 +; GCN-NEXT: v_cndmask_b32_e64 v3, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s20 +; GCN-NEXT: v_mov_b32_e32 v1, s22 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GCN-NEXT: v_mov_b32_e32 v1, s6 -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_mov_b32_e32 v3, s9 -; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s13, 64, s16 +; GCN-NEXT: s_sub_i32 s6, s16, 64 +; GCN-NEXT: s_lshr_b64 s[14:15], s[8:9], s13 +; GCN-NEXT: s_lshl_b64 s[20:21], s[10:11], s16 +; GCN-NEXT: s_lshl_b64 s[6:7], s[8:9], s6 +; GCN-NEXT: s_or_b64 s[14:15], s[20:21], s[14:15] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[16:17], s[18:19] +; GCN-NEXT: v_mov_b32_e32 v0, s7 +; GCN-NEXT: v_mov_b32_e32 v1, s15 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: v_cndmask_b32_e64 v7, v0, v1, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: v_mov_b32_e32 v1, s14 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: v_cndmask_b32_e64 v6, v0, v1, s[2:3] +; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], s12 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_lshl_b64 s[2:3], s[8:9], s16 +; GCN-NEXT: v_mov_b32_e32 v4, s3 +; GCN-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GCN-NEXT: s_endpgm %shift = shl <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null @@ -504,69 +520,66 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_lshr_v2i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v6, 16 -; GCN-NEXT: v_mov_b32_e32 v4, 0 -; GCN-NEXT: v_mov_b32_e32 v7, 0 -; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 -; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshr_b64 s[18:19], s[0:1], s8 -; GCN-NEXT: s_lshr_b64 s[20:21], s[2:3], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s1, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s1, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s0, s1 -; GCN-NEXT: s_and_b64 s[0:1], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; GCN-NEXT: s_lshr_b64 s[20:21], s[4:5], s12 -; GCN-NEXT: s_lshr_b64 s[10:11], s[6:7], s10 -; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s5, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s5, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s4, s5 -; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s8 -; GCN-NEXT: s_and_b64 s[4:5], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s4, s3, 0 -; GCN-NEXT: s_cselect_b32 s5, s2, 0 -; GCN-NEXT: s_lshr_b64 s[2:3], s[6:7], s12 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s3, 0 -; GCN-NEXT: s_cselect_b32 s1, s2, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mov_b32_e32 v1, s13 -; GCN-NEXT: v_mov_b32_e32 v2, s1 -; GCN-NEXT: v_mov_b32_e32 v3, s0 -; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s22 -; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[14:15], 0 +; GCN-NEXT: s_sub_i32 s22, 64, s12 +; GCN-NEXT: s_sub_i32 s20, s12, 64 +; GCN-NEXT: s_lshl_b64 s[22:23], s[6:7], s22 +; GCN-NEXT: s_lshr_b64 s[24:25], s[4:5], s12 +; GCN-NEXT: s_lshr_b64 s[20:21], s[6:7], s20 +; GCN-NEXT: s_or_b64 s[22:23], s[24:25], s[22:23] +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[12:13], s[14:15] +; GCN-NEXT: v_mov_b32_e32 v0, s21 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s20 +; GCN-NEXT: v_mov_b32_e32 v2, s22 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s13, 64, s16 +; GCN-NEXT: s_sub_i32 s4, s16, 64 +; GCN-NEXT: s_lshl_b64 s[14:15], s[10:11], s13 +; GCN-NEXT: s_lshr_b64 s[20:21], s[8:9], s16 +; GCN-NEXT: s_lshr_b64 s[4:5], s[10:11], s4 +; GCN-NEXT: s_or_b64 s[14:15], s[20:21], s[14:15] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[16:17], s[18:19] ; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s4 -; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCN-NEXT: v_mov_b32_e32 v3, s15 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s9 +; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s14 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s8 +; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] +; GCN-NEXT: s_lshr_b64 s[2:3], s[6:7], s12 +; GCN-NEXT: v_mov_b32_e32 v2, s3 +; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: s_lshr_b64 s[2:3], s[10:11], s16 +; GCN-NEXT: v_mov_b32_e32 v6, s3 +; GCN-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v6, s2 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GCN-NEXT: s_endpgm %shift = lshr <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null @@ -576,71 +589,70 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { ; GCN-LABEL: s_ashr_v2i128_ss: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x0 -; GCN-NEXT: v_mov_b32_e32 v6, 16 -; GCN-NEXT: v_mov_b32_e32 v4, 0 -; GCN-NEXT: v_mov_b32_e32 v7, 0 -; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v10, 16 +; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v11, 0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 -; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshr_b64 s[18:19], s[0:1], s8 -; GCN-NEXT: s_ashr_i64 s[20:21], s[2:3], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s1, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s1, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s0, s1 -; GCN-NEXT: s_and_b64 s[0:1], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; GCN-NEXT: s_lshr_b64 s[20:21], s[4:5], s12 -; GCN-NEXT: s_ashr_i64 s[10:11], s[6:7], s10 -; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s5, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s5, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s4, s5 -; GCN-NEXT: s_ashr_i32 s11, s3, 31 -; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s8 -; GCN-NEXT: s_and_b64 s[4:5], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s4, s3, s11 -; GCN-NEXT: s_cselect_b32 s5, s2, s11 -; GCN-NEXT: s_ashr_i32 s8, s7, 31 -; GCN-NEXT: s_ashr_i64 s[2:3], s[6:7], s12 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s3, s8 -; GCN-NEXT: s_cselect_b32 s1, s2, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mov_b32_e32 v1, s13 -; GCN-NEXT: v_mov_b32_e32 v2, s1 -; GCN-NEXT: v_mov_b32_e32 v3, s0 -; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] -; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s22 -; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[14:15], 0 +; GCN-NEXT: s_sub_i32 s22, 64, s12 +; GCN-NEXT: s_sub_i32 s20, s12, 64 +; GCN-NEXT: s_lshl_b64 s[22:23], s[6:7], s22 +; GCN-NEXT: s_lshr_b64 s[24:25], s[4:5], s12 +; GCN-NEXT: s_ashr_i64 s[20:21], s[6:7], s20 +; GCN-NEXT: s_or_b64 s[22:23], s[24:25], s[22:23] +; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[12:13], s[14:15] +; GCN-NEXT: v_mov_b32_e32 v0, s21 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: v_cmp_eq_u64_e64 s[0:1], s[0:1], 0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s20 +; GCN-NEXT: v_mov_b32_e32 v2, s22 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[16:17], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[18:19], 0 +; GCN-NEXT: s_sub_i32 s13, 64, s16 +; GCN-NEXT: s_sub_i32 s4, s16, 64 +; GCN-NEXT: s_lshl_b64 s[14:15], s[10:11], s13 +; GCN-NEXT: s_lshr_b64 s[20:21], s[8:9], s16 +; GCN-NEXT: s_ashr_i64 s[4:5], s[10:11], s4 +; GCN-NEXT: s_or_b64 s[14:15], s[20:21], s[14:15] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[16:17], s[18:19] ; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s4 -; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCN-NEXT: v_mov_b32_e32 v3, s15 +; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s9 +; GCN-NEXT: v_cndmask_b32_e64 v5, v2, v3, s[2:3] +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s14 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, s8 +; GCN-NEXT: v_cndmask_b32_e64 v4, v2, v3, s[2:3] +; GCN-NEXT: s_ashr_i32 s4, s7, 31 +; GCN-NEXT: s_ashr_i64 s[2:3], s[6:7], s12 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: v_mov_b32_e32 v6, s2 +; GCN-NEXT: s_ashr_i32 s4, s11, 31 +; GCN-NEXT: s_ashr_i64 s[2:3], s[10:11], s16 +; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GCN-NEXT: v_mov_b32_e32 v6, s4 +; GCN-NEXT: v_mov_b32_e32 v7, s3 +; GCN-NEXT: v_mov_b32_e32 v12, s2 +; GCN-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[0:1] +; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GCN-NEXT: s_endpgm %shift = ashr <2 x i128> %lhs, %rhs store <2 x i128> %shift, <2 x i128> addrspace(1)* null diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir index b99ca2b9fd327..9775c45310036 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -11,7 +11,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc - ; GCN-NEXT: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc + ; GCN-NEXT: dead [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 %0:sgpr_64 = COPY $sgpr4_sgpr5 %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 @@ -28,9 +28,9 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc - ; GCN-NEXT: dead %0:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN-NEXT: dead [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 @@ -68,9 +68,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -79,8 +79,8 @@ body: | ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: S_ENDPGM 0 @@ -117,9 +117,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -129,9 +129,9 @@ body: | ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -179,9 +179,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -191,8 +191,8 @@ body: | ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -242,9 +242,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -253,8 +253,8 @@ body: | ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -302,9 +302,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -314,9 +314,9 @@ body: | ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]] ; GCN-NEXT: $sgpr4_sgpr5 = S_MOV_B64 32 - ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) @@ -369,7 +369,7 @@ body: | ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec - ; GCN-NEXT: dead %5:sreg_64_xexec = S_MOV_B64 0 + ; GCN-NEXT: dead [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x80000000) @@ -380,11 +380,11 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc - ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_64 = PRED_COPY $exec, implicit-def $exec + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[PRED_COPY]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[PRED_COPY]], implicit-def dead $scc ; GCN-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]] - ; GCN-NEXT: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec + ; GCN-NEXT: dead [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index cbe66a1a0414a..fdba0c4cc83c6 100644 --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -6,16 +6,17 @@ ; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000 ; Make sure we are handling hazards correctly. -; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 +; SGPR: v_mov_b32_e32 v0, vcc_lo +; SGPR-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 +; SGPR-NEXT: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; 4-byte Folded Reload +; SGPR-NEXT: s_mov_b64 exec, [[EXEC_COPY]] ; SGPR-NEXT: s_waitcnt vmcnt(0) ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0 ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1 ; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2 ; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3 -; SGPR-NEXT: buffer_load_dword [[VHI]], off, s[96:99], 0 -; SGPR-NEXT: s_waitcnt vmcnt(0) -; SGPR-NEXT: s_mov_b64 exec, s[4:5] -; SGPR-NEXT: s_nop 1 +; SGPR-NEXT: ; kill: killed $vgpr1 +; SGPR-NEXT: s_nop 4 ; SGPR-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; ALL: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 4239f52e7aa34..c89f00abea15d 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -198,10 +198,12 @@ entry: ; Have another non-tail in the function ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call: -; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 -; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 +; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 [[CSRV]], s33, 2 +; GCN: v_writelane_b32 [[CSRV]], [[FP_SCRATCH_COPY]], 2 ; GCN-DAG: s_addk_i32 s32, 0x400 ; GCN-DAG: s_getpc_b64 s[4:5] @@ -219,18 +221,18 @@ entry: ; GCN-DAG: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN: s_getpc_b64 s[4:5] +; GCN: v_readlane_b32 s30, [[CSRV]], 0 +; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 -; GCN-DAG: v_readlane_b32 s30, [[CSRV]], 0 -; GCN-DAG: v_readlane_b32 s31, [[CSRV]], 1 - -; GCN: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s31, [[CSRV]], 1 +; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[CSRV]], 2 +; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GCN-NEXT: buffer_load_dword [[CSRV]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_setpc_b64 s[4:5] define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index af290aa914adb..c5ceb23337577 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -15,12 +15,19 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 ; uses an SGPR (implicit vcc). ; GCN-LABEL: {{^}}sint_to_fp_i1_f64: -; GCN-DAG: s_cmp_eq_u32 -; GCN-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0 -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] -; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]] -; GCN: s_endpgm +; VI-DAG: s_cmp_eq_u32 +; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0 +; VI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] +; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]] +; VI: s_endpgm + +; CI-DAG: s_cmp_eq_u32 +; CI-DAG: s_cselect_b64 vcc, -1, 0 +; CI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}, vcc +; CI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; CI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]] +; CI: s_endpgm define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0 %fp = sitofp i1 %cmp to double diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index 0b48ce04dce4d..6c2da603fd86b 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -201,7 +201,7 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr64 ; GFX90A-SPILLED: bb.0: @@ -245,7 +245,7 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; SPILLED-LABEL: name: spill_restore_agpr64 ; SPILLED: bb.0: @@ -556,7 +556,7 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -570,8 +570,8 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr96 ; GFX90A-SPILLED: bb.0: @@ -604,7 +604,7 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -618,8 +618,8 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; SPILLED-LABEL: name: spill_restore_agpr96 ; SPILLED: bb.0: @@ -703,8 +703,8 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -718,9 +718,9 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr128 ; GFX90A-SPILLED: bb.0: @@ -755,8 +755,8 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -770,9 +770,9 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; SPILLED-LABEL: name: spill_restore_agpr128 ; SPILLED: bb.0: @@ -860,9 +860,9 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -876,10 +876,10 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr160 ; GFX90A-SPILLED: bb.0: @@ -916,9 +916,9 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -932,10 +932,10 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; SPILLED-LABEL: name: spill_restore_agpr160 ; SPILLED: bb.0: @@ -1027,10 +1027,10 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -1044,11 +1044,11 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr192 ; GFX90A-SPILLED: bb.0: @@ -1087,10 +1087,10 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -1104,11 +1104,11 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; SPILLED-LABEL: name: spill_restore_agpr192 ; SPILLED: bb.0: @@ -1206,12 +1206,12 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -1225,13 +1225,13 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr256 ; GFX90A-SPILLED: bb.0: @@ -1274,12 +1274,12 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -1293,13 +1293,13 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; SPILLED-LABEL: name: spill_restore_agpr256 ; SPILLED: bb.0: @@ -1417,20 +1417,20 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -1444,21 +1444,21 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr512 ; GFX90A-SPILLED: bb.0: @@ -1517,20 +1517,20 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -1544,21 +1544,21 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; SPILLED-LABEL: name: spill_restore_agpr512 ; SPILLED: bb.0: @@ -1724,36 +1724,36 @@ body: | ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX908-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED-NEXT: {{ $}} @@ -1767,37 +1767,37 @@ body: | ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX908-EXPANDED-NEXT: {{ $}} ; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-EXPANDED-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX908-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX908-EXPANDED-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-SPILLED-LABEL: name: spill_restore_agpr1024 ; GFX90A-SPILLED: bb.0: @@ -1888,36 +1888,36 @@ body: | ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31_lo16 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX90A-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec + ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED-NEXT: {{ $}} @@ -1931,37 +1931,37 @@ body: | ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX90A-EXPANDED-NEXT: {{ $}} ; GFX90A-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-EXPANDED-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX90A-EXPANDED-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr30, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr29, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr28, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr27, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr26, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr25, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr24, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr23, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr22, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr21, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr20, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr19, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr18, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr17, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr16, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec + ; GFX90A-EXPANDED-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; SPILLED-LABEL: name: spill_restore_agpr1024 ; SPILLED: bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll index 0b5109b7270bd..89e0886592459 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -1,19 +1,25 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}spill_csr_s5_copy: -; GCN: s_or_saveexec_b64 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN: s_xor_saveexec_b64 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 v40, s33, 3 +; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 ; GCN: s_swappc_b64 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; GCN: v_readlane_b32 s33, v40, 3 -; GCN: s_or_saveexec_b64 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 +; GCN: s_xor_saveexec_b64 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_mov_b64 exec +; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 define void @spill_csr_s5_copy() #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll index 8077a0b6adfbd..d93ce686f2ace 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -78,8 +78,8 @@ entry: ; 0x40000 / 64 = 4096 (for wave64) %a = load volatile i32, i32 addrspace(5)* %aptr - ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 - ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Spill + ; MUBUF: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x40100 + ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], [[SOFF]] ; 4-byte Folded Spill ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill @@ -97,8 +97,8 @@ entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 - ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 - ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Reload + ; MUBUF: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x40100 + ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], [[SOFF]] ; 4-byte Folded Reload ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004 ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload @@ -129,8 +129,8 @@ entry: ; 0x40000 / 64 = 4096 (for wave64) %a = load volatile i32, i32 addrspace(5)* %aptr - ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 - ; MUBUF: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Spill + ; MUBUF: s_mov_b32 [[SOFF:s[0-9]+]], 0x40100 + ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], [[SOFF]] ; 4-byte Folded Spill ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill @@ -148,8 +148,8 @@ entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 - ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 - ; MUBUF: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Reload + ; MUBUF: s_mov_b32 [[SOFF:s[0-9]+]], 0x40100 + ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], [[SOFF]] ; 4-byte Folded Reload ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004 ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload @@ -336,5 +336,5 @@ entry: attributes #0 = { nounwind } attributes #1 = { nounwind "amdgpu-num-sgpr"="17" "amdgpu-num-vgpr"="8" } -attributes #2 = { nounwind "amdgpu-num-sgpr"="14" "amdgpu-num-vgpr"="8" } -attributes #3 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" } +attributes #2 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" } +attributes #3 = { nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" } diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index 2b056e56d632a..73a987dba0ced 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted # by the copy propagation after lowering the spill. @@ -30,17 +30,19 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8_lo16 - ; GCN-NEXT: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr1 - ; GCN-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1 + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr8 @@ -75,16 +77,18 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3_lo16 - ; GCN-NEXT: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; GCN-NEXT: S_ENDPGM 0 @@ -114,12 +118,12 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16 - ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2 + ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) - ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr1 + ; GCN-NEXT: renamable $vgpr8 = COPY $vgpr2, implicit $exec ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr8 renamable $vgpr1 = COPY $vgpr2 SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) @@ -147,11 +151,11 @@ body: | ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 - ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) ; GCN-NEXT: S_ENDPGM 0 renamable $vgpr1 = COPY $vgpr2 SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index e0fc1e19b1677..646d201f33211 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX6 %s ; RUN: llc -sgpr-regalloc=basic -vgpr-regalloc=basic -march=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mattr=-xnack,+enable-flat-scratch -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX9-FLATSCR,FLATSCR %s @@ -12,17 +13,10044 @@ ; mechanism works even when many spills happen. ; Just test that it compiles successfully. -; CHECK-LABEL: test - -; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}} -; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill -; GFX9-FLATSCR: ;;#ASMSTART -; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}} -; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload - -; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill -; GFX10-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, off offset:{{[0-9]+}} ; 16-byte Folded Reload define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) { +; GFX6-LABEL: test: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_mov_b32 s44, SCRATCH_RSRC_DWORD0 +; GFX6-NEXT: s_mov_b32 s45, SCRATCH_RSRC_DWORD1 +; GFX6-NEXT: s_mov_b32 s46, -1 +; GFX6-NEXT: s_mov_b32 s47, 0xe8f000 +; GFX6-NEXT: s_add_u32 s44, s44, s3 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v5, 13, v0 +; GFX6-NEXT: s_mov_b32 s18, 0 +; GFX6-NEXT: s_mov_b32 s19, 0xf000 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v5 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX6-NEXT: s_movk_i32 s4, 0x80 +; GFX6-NEXT: s_mov_b32 s5, s18 +; GFX6-NEXT: s_mov_b64 s[6:7], s[18:19] +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3968 +; GFX6-NEXT: s_addc_u32 s45, s45, 0 +; GFX6-NEXT: s_movk_i32 s8, 0x100 +; GFX6-NEXT: s_mov_b32 s9, s18 +; GFX6-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX6-NEXT: s_movk_i32 s12, 0x180 +; GFX6-NEXT: s_mov_b32 s13, s18 +; GFX6-NEXT: s_mov_b64 s[14:15], s[18:19] +; GFX6-NEXT: s_movk_i32 s20, 0x200 +; GFX6-NEXT: s_mov_b32 s21, s18 +; GFX6-NEXT: s_mov_b64 s[22:23], s[18:19] +; GFX6-NEXT: s_movk_i32 s24, 0x280 +; GFX6-NEXT: s_mov_b32 s25, s18 +; GFX6-NEXT: s_mov_b64 s[26:27], s[18:19] +; GFX6-NEXT: s_movk_i32 s28, 0x300 +; GFX6-NEXT: s_mov_b32 s29, s18 +; GFX6-NEXT: s_mov_b64 s[30:31], s[18:19] +; GFX6-NEXT: s_movk_i32 s36, 0x380 +; GFX6-NEXT: s_mov_b32 s37, s18 +; GFX6-NEXT: s_mov_b64 s[38:39], s[18:19] +; GFX6-NEXT: s_movk_i32 s40, 0x400 +; GFX6-NEXT: s_mov_b32 s41, s18 +; GFX6-NEXT: s_mov_b64 s[42:43], s[18:19] +; GFX6-NEXT: s_mov_b64 s[16:17], s[2:3] +; GFX6-NEXT: v_mov_b32_e32 v6, 0 +; GFX6-NEXT: s_mov_b32 s33, 0x3fd00 +; GFX6-NEXT: s_mov_b64 s[2:3], s[18:19] +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1268 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1280 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1300 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1312 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1332 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1344 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1364 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1376 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1396 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1408 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1428 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1440 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1460 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1472 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1492 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1504 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1556 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1568 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1588 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1600 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1620 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1632 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1652 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1664 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1684 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1696 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1716 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1728 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1748 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1760 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1780 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1792 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1860 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1872 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1892 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1904 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1924 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1936 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1956 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1968 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1988 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2000 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2020 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2032 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2052 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2064 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2084 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2088 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2092 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2096 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2148 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2160 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2180 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2192 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2212 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2224 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2244 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2256 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2276 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2288 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2308 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2320 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2340 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2352 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2372 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2384 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2452 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2464 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2484 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2496 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2516 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2528 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2548 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2560 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2580 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2592 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2612 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2624 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2644 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2656 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2676 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2688 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2740 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2752 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2772 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2784 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2804 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2816 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2836 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2848 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2868 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2880 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2900 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2912 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2932 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2944 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2964 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2976 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3044 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3056 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3076 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3084 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3088 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3108 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3120 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3140 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3152 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3172 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3184 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3204 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3216 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3236 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3248 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3268 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3272 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3276 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3280 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3332 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3336 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3340 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3344 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3364 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3368 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3372 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3376 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3396 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3400 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3404 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3408 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3428 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3432 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3436 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3440 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3460 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3464 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3468 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3472 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3492 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3496 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3500 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3504 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3524 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[40:43], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(3) +; GFX6-NEXT: v_add_i32_e32 v7, vcc, s0, v5 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3556 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3560 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3564 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3568 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:16 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:32 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:48 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:64 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:256 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:272 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:288 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:304 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:320 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:336 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:352 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:368 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:384 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:416 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:432 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:448 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:464 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:480 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:496 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:512 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:528 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:544 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:560 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:576 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:592 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:608 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:624 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:640 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:656 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:672 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:688 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:704 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:720 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:736 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:752 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:768 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:784 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:816 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:832 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:848 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:864 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:880 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:896 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:912 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:928 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:944 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:960 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:976 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:992 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1008 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1024 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1040 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1056 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1072 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1088 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1104 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1120 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1136 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1152 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1168 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1184 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1200 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1216 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1232 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1248 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1264 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1280 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1296 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1312 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1328 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1344 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1360 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1376 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1392 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1408 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1424 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1440 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1456 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1472 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1488 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1504 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1520 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1536 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1552 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1568 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1584 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1600 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1616 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1632 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1648 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1664 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1680 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1696 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1712 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1728 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1744 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1760 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1776 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1792 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1808 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1824 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1840 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1856 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1872 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1888 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1904 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1920 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1936 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1952 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1968 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1984 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2016 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2032 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2048 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2064 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2080 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2096 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2112 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2128 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2144 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2160 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2176 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2192 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2208 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2224 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2240 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2256 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2272 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2288 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2304 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2320 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2336 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2352 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2368 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2384 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2416 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2432 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2448 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2464 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2480 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2496 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2512 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2528 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2544 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2560 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2576 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2592 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2608 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2624 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2640 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2656 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2672 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2688 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2704 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2720 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2736 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2752 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2768 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2784 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2816 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2832 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2848 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2864 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2880 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2896 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2912 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2928 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2944 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2960 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2976 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2992 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3008 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3024 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3040 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3056 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3072 +; GFX6-NEXT: s_mov_b32 s33, 0x40100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3088 +; GFX6-NEXT: s_mov_b32 s33, 0x40500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3104 +; GFX6-NEXT: s_mov_b32 s33, 0x40900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3120 +; GFX6-NEXT: s_mov_b32 s33, 0x40d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3136 +; GFX6-NEXT: s_mov_b32 s33, 0x41100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3152 +; GFX6-NEXT: s_mov_b32 s33, 0x41500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3168 +; GFX6-NEXT: s_mov_b32 s33, 0x41900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3184 +; GFX6-NEXT: s_mov_b32 s33, 0x41d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3200 +; GFX6-NEXT: s_mov_b32 s33, 0x42100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3216 +; GFX6-NEXT: s_mov_b32 s33, 0x42500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3232 +; GFX6-NEXT: s_mov_b32 s33, 0x42900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3248 +; GFX6-NEXT: s_mov_b32 s33, 0x42d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3264 +; GFX6-NEXT: s_mov_b32 s33, 0x43100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3280 +; GFX6-NEXT: s_mov_b32 s33, 0x43500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3296 +; GFX6-NEXT: s_mov_b32 s33, 0x43900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3312 +; GFX6-NEXT: s_mov_b32 s33, 0x43d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3328 +; GFX6-NEXT: s_mov_b32 s33, 0x44100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3344 +; GFX6-NEXT: s_mov_b32 s33, 0x44500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3360 +; GFX6-NEXT: s_mov_b32 s33, 0x44900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3376 +; GFX6-NEXT: s_mov_b32 s33, 0x44d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3392 +; GFX6-NEXT: s_mov_b32 s33, 0x45100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3408 +; GFX6-NEXT: s_mov_b32 s33, 0x45500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3424 +; GFX6-NEXT: s_mov_b32 s33, 0x45900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3440 +; GFX6-NEXT: s_mov_b32 s33, 0x45d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3456 +; GFX6-NEXT: s_mov_b32 s33, 0x46100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3472 +; GFX6-NEXT: s_mov_b32 s33, 0x46500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3488 +; GFX6-NEXT: s_mov_b32 s33, 0x46900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3504 +; GFX6-NEXT: s_mov_b32 s33, 0x46d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3520 +; GFX6-NEXT: s_mov_b32 s33, 0x47100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3536 +; GFX6-NEXT: s_mov_b32 s33, 0x47500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3552 +; GFX6-NEXT: s_mov_b32 s33, 0x47900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3568 +; GFX6-NEXT: s_mov_b32 s33, 0x47d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3584 +; GFX6-NEXT: s_mov_b32 s33, 0x48100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3600 +; GFX6-NEXT: s_mov_b32 s33, 0x48500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3616 +; GFX6-NEXT: s_mov_b32 s33, 0x48900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3632 +; GFX6-NEXT: s_mov_b32 s33, 0x48d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3648 +; GFX6-NEXT: s_mov_b32 s33, 0x49100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3664 +; GFX6-NEXT: s_mov_b32 s33, 0x49500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3680 +; GFX6-NEXT: s_mov_b32 s33, 0x49900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3696 +; GFX6-NEXT: s_mov_b32 s33, 0x49d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3712 +; GFX6-NEXT: s_mov_b32 s33, 0x4a100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3728 +; GFX6-NEXT: s_mov_b32 s33, 0x4a500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3744 +; GFX6-NEXT: s_mov_b32 s33, 0x4a900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3760 +; GFX6-NEXT: s_mov_b32 s33, 0x4ad00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3776 +; GFX6-NEXT: s_mov_b32 s33, 0x4b100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3792 +; GFX6-NEXT: s_mov_b32 s33, 0x4b500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3808 +; GFX6-NEXT: s_mov_b32 s33, 0x4b900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3824 +; GFX6-NEXT: s_mov_b32 s33, 0x4bd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3840 +; GFX6-NEXT: s_mov_b32 s33, 0x4c100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3856 +; GFX6-NEXT: s_mov_b32 s33, 0x4c500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3872 +; GFX6-NEXT: s_mov_b32 s33, 0x4c900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3888 +; GFX6-NEXT: s_mov_b32 s33, 0x4cd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3904 +; GFX6-NEXT: s_mov_b32 s33, 0x4d100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3920 +; GFX6-NEXT: s_mov_b32 s33, 0x4d500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3936 +; GFX6-NEXT: s_mov_b32 s33, 0x4d900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3952 +; GFX6-NEXT: s_mov_b32 s33, 0x4dd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3968 +; GFX6-NEXT: s_mov_b32 s33, 0x4e100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3984 +; GFX6-NEXT: s_mov_b32 s33, 0x4e500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4000 +; GFX6-NEXT: s_mov_b32 s33, 0x4e900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4016 +; GFX6-NEXT: s_mov_b32 s33, 0x4ed00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4032 +; GFX6-NEXT: s_mov_b32 s33, 0x4f100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4048 +; GFX6-NEXT: s_mov_b32 s33, 0x4f500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4064 +; GFX6-NEXT: s_mov_b32 s33, 0x4f900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4080 +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3568 ; 4-byte Folded Reload +; GFX6-NEXT: v_mov_b32_e32 v4, s1 +; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3536 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3504 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3472 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3440 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3408 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3376 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3344 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3280 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3248 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3216 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3184 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3152 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3120 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3088 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3056 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2976 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2944 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2912 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2880 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2848 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2816 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2784 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2752 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2688 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2656 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2624 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2592 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2560 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2528 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2496 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2464 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2384 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2352 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2320 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2288 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2256 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2224 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2192 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2160 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2088 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2096 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2064 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2032 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2000 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1968 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1936 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1904 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1872 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1792 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1760 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1728 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1696 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1664 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1632 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1600 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1560 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1568 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1496 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1504 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1464 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1472 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1432 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1440 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1400 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1408 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1368 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1376 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1336 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1344 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1312 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1272 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1280 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3968 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080 +; GFX6-NEXT: s_mov_b32 s4, 0x4f900 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4f500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4f100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4ed00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4e900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4e500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4e100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4dd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4d900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4d500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4d100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4cd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4c900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4c500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4c100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4bd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4b900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4b500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4b100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4ad00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4a900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4a500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x4a100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x49d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x49900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x49500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x49100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x48d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x48900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x48500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x48100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x47d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x47900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x47500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x47100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x46d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x46900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x46500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x46100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x45d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x45900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x45500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x45100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x44d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x44900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x44500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x44100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x43d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x43900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x43500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x43100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x42d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x42900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x42500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x42100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x41d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x41900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x41500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x41100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x40d00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x40900 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x40500 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x40100 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x3fd00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 +; GFX6-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: test: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 +; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x80 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, s2, v5 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, 4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x100 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0xd4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x180 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x154 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x1d4 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x254 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x2d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, 20 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, 36 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_mov_b32 s6, 52 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x74 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0xa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0xb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0xc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x200 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0xe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0xf4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x104 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x114 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x124 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x134 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x280 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x164 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x174 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x184 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x194 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x1a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x1b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x1c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x300 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x1e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x1f4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x204 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x214 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x224 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x234 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x244 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x380 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x264 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x274 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x284 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x294 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x2a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x2b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x2c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x400 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x2e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x2f4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x304 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x314 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x324 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x334 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x344 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x354 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x364 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v2 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x374 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x384 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x394 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v4, vcc, s0, v5 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3952 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3936 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3920 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3904 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3888 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3872 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3856 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3840 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3824 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3808 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3792 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3776 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3760 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3744 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3728 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3712 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3696 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3680 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3664 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3648 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3632 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3616 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3600 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3584 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3568 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3552 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3536 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3520 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3504 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3488 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3472 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3456 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3440 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3424 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3408 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3392 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3376 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3360 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3344 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3328 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3312 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3296 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3280 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3264 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3248 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3232 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3216 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3200 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3184 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3168 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3152 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3136 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3120 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3104 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3088 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3072 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3056 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3040 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3024 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3008 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2992 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2976 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2960 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2944 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2928 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2912 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2896 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2880 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2864 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2848 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2832 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2816 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2800 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2784 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2768 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2752 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2736 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2720 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2704 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2688 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2672 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2656 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2640 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2624 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2608 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2592 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2576 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2560 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2544 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2528 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2512 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2496 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2480 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2464 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2448 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2432 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2416 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2400 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2384 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2368 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2352 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2336 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2320 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2304 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2288 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2272 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2256 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2240 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2224 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2208 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2192 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2176 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2160 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2144 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2128 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2112 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2096 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s11 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x394 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x384 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x374 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x354 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x344 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x334 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x324 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x314 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x304 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x294 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x284 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x274 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x254 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x244 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x234 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x224 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x214 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x204 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1f4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1d4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1c4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1b4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1a4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x194 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x184 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x174 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x154 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x134 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x124 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x114 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x104 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xf4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xd4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xc4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xb4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xa4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x94 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x84 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x74 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v4 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x64 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4080 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x54 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4064 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x44 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4048 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 52 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4032 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 36 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4016 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 20 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4000 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 4 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3984 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3968 +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX10-FLATSCR-LABEL: test: +; GFX10-FLATSCR: ; %bb.0: ; %entry +; GFX10-FLATSCR-NEXT: s_add_u32 s2, s2, s5 +; GFX10-FLATSCR-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 +; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 +; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s4, s2, v5 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v22, null, s3, 0, s4 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x804 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x80, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x100, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:20 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:36 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:52 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:68 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:84 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:100 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:116 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:132 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:148 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:164 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:180 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:196 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:212 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:228 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:244 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v6, vcc_lo, 0x180, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:260 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:276 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:292 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:308 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:324 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:340 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:356 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:372 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v8, vcc_lo, 0x200, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:388 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:404 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:420 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:436 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:452 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:468 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:484 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:500 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v10, vcc_lo, 0x280, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:516 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:532 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:548 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:564 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:580 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:596 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:612 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:628 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v12, vcc_lo, 0x300, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:644 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:660 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:676 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:692 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:708 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:724 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:740 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:756 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v14, vcc_lo, 0x380, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v15, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:772 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:788 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:804 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:820 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:836 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:852 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:868 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:884 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v16, vcc_lo, 0x400, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:900 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:916 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:932 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:948 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:964 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:980 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:996 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:1012 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v18, vcc_lo, 0x480, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x500, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1028 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1044 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1060 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1076 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1092 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1108 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1124 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1140 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1156 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1172 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1188 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1204 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1220 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1236 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1252 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x580, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1268 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1284 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1300 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1316 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1332 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1348 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1364 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1380 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x600, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1396 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1412 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1428 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1444 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1460 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1476 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1492 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1508 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x680, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1524 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1540 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1556 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1572 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1588 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1604 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1620 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1636 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x700, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1652 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1668 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1684 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1700 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1716 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1732 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1748 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1764 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x780, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v0 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1780 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1796 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1812 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1828 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1844 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1860 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1876 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1892 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[20:21], off offset:2032 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1908 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1924 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1940 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1956 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1972 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1988 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2004 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2020 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v2 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2036 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x814 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x824 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x834 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x844 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x854 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x864 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x874 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v6 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v7, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x884 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x894 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v8 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x904 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x914 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x924 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x934 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x944 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x954 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x964 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x974 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v10 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v11, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x984 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x994 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v12 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v13, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v14 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v15, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v16 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v17, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v18 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v19, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_clause 0x1 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 +; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s2, s0, v5 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v6, null, s1, 0, s2 +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1394 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1384 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1374 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1364 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1354 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1344 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1334 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1324 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1314 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1304 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1294 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1284 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1274 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1264 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1254 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1244 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1234 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1224 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1214 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1204 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1194 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1184 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1174 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1164 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1154 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1144 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1134 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1124 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1114 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1104 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10e4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1094 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1084 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1074 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1064 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1054 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1044 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1034 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1024 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1014 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1004 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xff4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfe4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfa4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xef4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xee4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xed4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xec4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xeb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xea4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xde4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xda4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xce4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xca4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc64 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbf4 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x480, v4 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x780, v0 +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbe4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbd4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbc4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbb4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xba4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x400, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb64 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x780, v2 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v3, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaf4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x380, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xae4 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x780, v7 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xad4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xac4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xab4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaa4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa94 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa84 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa74 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x300, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa64 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x780, v9 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v10, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa54 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa44 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa34 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa24 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa14 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa04 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x280, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9e4 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x780, v11 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x994 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x984 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x974 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x200, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x964 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x780, v13 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v14, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x954 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x944 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x934 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x924 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x914 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x904 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8f4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x180, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8e4 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x780, v15 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v16, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8d4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8c4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8b4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8a4 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x894 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x884 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x874 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x100, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x864 +; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x780, v17 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v18, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x854 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x844 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x834 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x824 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x814 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x804 +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2036 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x80, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v19 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2020 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2004 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1988 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1972 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1956 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1940 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1924 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1908 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1892 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1876 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1860 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1844 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1828 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1812 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1796 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1780 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x700, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1764 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1748 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1732 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1716 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1700 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1684 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1668 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1652 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x680, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1636 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1620 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1604 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1588 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1572 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1556 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1540 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1524 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x600, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1508 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1492 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1476 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1460 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1444 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1428 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1412 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1396 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x580, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, vcc_lo, 0x500, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1380 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1364 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1348 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1332 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1316 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1300 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1284 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1268 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1252 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1236 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1220 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1204 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1188 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1172 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1156 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1140 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1124 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1108 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1092 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1076 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1060 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1044 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1028 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1012 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:996 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:980 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:964 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:948 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:932 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:916 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:900 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:884 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:868 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:852 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:836 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:820 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:804 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:788 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:772 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:756 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:740 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:724 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:708 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:692 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:676 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:660 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:644 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:628 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:612 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:596 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:580 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:564 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:548 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:532 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:516 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:500 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:484 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:468 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:452 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:436 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:420 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:404 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:388 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:372 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:356 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:340 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:324 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:308 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:292 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:276 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:260 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:244 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:228 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:212 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:196 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:180 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:164 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:148 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:132 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:116 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2032 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:100 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2016 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:84 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2000 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:68 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1984 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:52 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1968 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:36 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1952 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:20 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1936 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:4 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1920 +; GFX10-FLATSCR-NEXT: s_endpgm entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -45,79 +10073,991 @@ entry: ret void } -; CHECK-LABEL: test_limited_sgpr -; GFX6: %bb.1: -; GFX6: s_mov_b64 exec, 0xff -; GFX6: buffer_store_dword [[SPILL_REG_0:v[0-9]+]] -; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_0]] -; GFX6: v_mov_b32_e32 [[OFFSET_REG0:v[0-9]+]], 0x[[OFFSET0:[0-9a-f]+]] -; GFX6: buffer_store_dword [[SPILL_REG_0]], [[OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6: buffer_load_dword [[SPILL_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, s - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG0:v[0-9]+]], 0x[[RELOAD_OFFSET0:[0-9a-f]+]] -; GFX6: buffer_store_dword [[RELOAD_REG_0:v[0-9]+]], off, -; GFX6: buffer_load_dword [[RELOAD_REG_0]], [[RELOAD_OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_0]] -; GFX6: buffer_load_dword [[RELOAD_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: buffer_store_dword [[SPILL_REG_1:v[0-9]+]] -; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_1]] -; GFX6: v_mov_b32_e32 [[OFFSET_REG1:v[0-9]+]], 0x[[OFFSET1:[0-9a-f]+]] -; GFX6: buffer_store_dword [[SPILL_REG_1]], [[OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6: buffer_load_dword [[SPILL_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, s - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG1:v[0-9]+]], 0x[[RELOAD_OFFSET1:[0-9a-f]+]] -; GFX6: buffer_store_dword [[RELOAD_REG_1:v[0-9]+]], off, -; GFX6: buffer_load_dword [[RELOAD_REG_1]], [[RELOAD_OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_1]] -; GFX6: buffer_load_dword [[RELOAD_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: buffer_store_dword [[SPILL_REG_2:v[0-9]+]] -; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_2]] -; GFX6: v_mov_b32_e32 [[OFFSET_REG2:v[0-9]+]], 0x[[OFFSET2:[0-9a-f]+]] -; GFX6: buffer_store_dword [[SPILL_REG_2]], [[OFFSET_REG2]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6: buffer_load_dword [[SPILL_REG_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, s - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: buffer_store_dword [[SPILL_REG_3:v[0-9]+]] -; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_3]] -; GFX6: v_mov_b32_e32 [[OFFSET_REG3:v[0-9]+]], 0x[[OFFSET3:[0-9a-f]+]] -; GFX6: buffer_store_dword [[SPILL_REG_3]], [[OFFSET_REG3]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6: buffer_load_dword [[SPILL_REG_3]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, s - - -; GFX6: s_mov_b64 exec, 0xff -; GFX6: buffer_store_dword [[SPILL_REG_4:v[0-9]+]] -; GFX6-COUNT-4: v_writelane_b32 [[SPILL_REG_4]] -; GFX6: v_mov_b32_e32 [[OFFSET_REG4:v[0-9]+]], 0x[[OFFSET4:[0-9a-f]+]] -; GFX6: buffer_store_dword [[SPILL_REG_4]], [[OFFSET_REG4]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen -; GFX6: buffer_load_dword [[SPILL_REG_4]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GFX6: s_mov_b64 exec, s - -; GFX6: NumSgprs: 48 -; GFX6: ScratchSize: 8608 - -; FLATSCR: s_movk_i32 [[SOFF1:s[0-9]+]], 0x -; GFX9-FLATSCR: s_waitcnt vmcnt(0) -; FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill -; FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x -; FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 { +; GFX6-LABEL: test_limited_sgpr: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 +; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 +; GFX6-NEXT: s_mov_b32 s42, -1 +; GFX6-NEXT: s_mov_b32 s43, 0xe8f000 +; GFX6-NEXT: s_add_u32 s40, s40, s3 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v5, -1, v0 +; GFX6-NEXT: v_mov_b32_e32 v6, 0 +; GFX6-NEXT: s_mov_b32 s6, 0 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 8, v5 +; GFX6-NEXT: v_mov_b32_e32 v8, v6 +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:240 +; GFX6-NEXT: s_addc_u32 s41, s41, 0 +; GFX6-NEXT: s_mov_b32 s8, 0x83800 +; GFX6-NEXT: s_mov_b32 s12, 0x82400 +; GFX6-NEXT: s_mov_b32 s24, 0x82000 +; GFX6-NEXT: s_mov_b32 s33, 0x81c00 +; GFX6-NEXT: s_mov_b32 s34, 0x81800 +; GFX6-NEXT: s_mov_b32 s36, 0x81400 +; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:224 +; GFX6-NEXT: s_mov_b32 s8, 0x83400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:208 +; GFX6-NEXT: s_mov_b32 s8, 0x83000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:192 +; GFX6-NEXT: s_mov_b32 s8, 0x82c00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:176 +; GFX6-NEXT: s_mov_b32 s8, 0x82800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s8 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s8 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s8 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s8 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:160 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s12 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s12 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s12 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:144 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s24 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s24 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s24 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s24 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:128 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s33 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s33 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s33 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s33 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:112 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s34 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s34 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s34 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s34 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:96 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s36 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s36 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s36 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:80 +; GFX6-NEXT: s_mov_b32 s36, 0x81000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s36 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s36 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s36 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 offset:64 +; GFX6-NEXT: s_mov_b32 s36, 0x80800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s36 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s36 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s36 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[4:7], 0 addr64 +; GFX6-NEXT: buffer_load_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:16 +; GFX6-NEXT: s_mov_b32 s36, 0x80c00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s36 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v11, off, s[40:43], s36 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v12, off, s[40:43], s36 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_load_dwordx4 v[13:16], v[7:8], s[4:7], 0 addr64 offset:32 +; GFX6-NEXT: s_mov_b64 s[36:37], exec +; GFX6-NEXT: s_waitcnt expcnt(3) +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v9, s0, 0 +; GFX6-NEXT: v_writelane_b32 v9, s1, 1 +; GFX6-NEXT: v_writelane_b32 v9, s2, 2 +; GFX6-NEXT: v_writelane_b32 v9, s3, 3 +; GFX6-NEXT: s_mov_b32 s38, 0x80400 +; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s38 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[36:37] +; GFX6-NEXT: buffer_load_dwordx4 v[17:20], v[7:8], s[4:7], 0 addr64 offset:48 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4 +; GFX6-NEXT: v_mov_b32_e32 v7, 1 +; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[4:11] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_mov_b64 s[36:37], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v4, s4, 0 +; GFX6-NEXT: v_writelane_b32 v4, s5, 1 +; GFX6-NEXT: v_writelane_b32 v4, s6, 2 +; GFX6-NEXT: v_writelane_b32 v4, s7, 3 +; GFX6-NEXT: v_writelane_b32 v4, s8, 4 +; GFX6-NEXT: v_writelane_b32 v4, s9, 5 +; GFX6-NEXT: v_writelane_b32 v4, s10, 6 +; GFX6-NEXT: v_writelane_b32 v4, s11, 7 +; GFX6-NEXT: s_mov_b32 s38, 0x83c00 +; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s38 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[36:37] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[8:15] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[16:23] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[24:31] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[4:7] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s[2:3] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; def s33 +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_and_saveexec_b64 s[34:35], vcc +; GFX6-NEXT: s_cbranch_execz .LBB1_2 +; GFX6-NEXT: ; %bb.1: ; %bb0 +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s8, 0 +; GFX6-NEXT: v_writelane_b32 v7, s9, 1 +; GFX6-NEXT: v_writelane_b32 v7, s10, 2 +; GFX6-NEXT: v_writelane_b32 v7, s11, 3 +; GFX6-NEXT: v_writelane_b32 v7, s12, 4 +; GFX6-NEXT: v_writelane_b32 v7, s13, 5 +; GFX6-NEXT: v_writelane_b32 v7, s14, 6 +; GFX6-NEXT: v_writelane_b32 v7, s15, 7 +; GFX6-NEXT: s_mov_b32 s36, 0x84400 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: s_mov_b32 s36, 0x83c00 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s36 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s8, v7, 0 +; GFX6-NEXT: v_readlane_b32 s9, v7, 1 +; GFX6-NEXT: v_readlane_b32 s10, v7, 2 +; GFX6-NEXT: v_readlane_b32 s11, v7, 3 +; GFX6-NEXT: v_readlane_b32 s12, v7, 4 +; GFX6-NEXT: v_readlane_b32 s13, v7, 5 +; GFX6-NEXT: v_readlane_b32 s14, v7, 6 +; GFX6-NEXT: v_readlane_b32 s15, v7, 7 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s16, 0 +; GFX6-NEXT: v_writelane_b32 v7, s17, 1 +; GFX6-NEXT: v_writelane_b32 v7, s18, 2 +; GFX6-NEXT: v_writelane_b32 v7, s19, 3 +; GFX6-NEXT: v_writelane_b32 v7, s20, 4 +; GFX6-NEXT: v_writelane_b32 v7, s21, 5 +; GFX6-NEXT: v_writelane_b32 v7, s22, 6 +; GFX6-NEXT: v_writelane_b32 v7, s23, 7 +; GFX6-NEXT: s_mov_b32 s36, 0x84c00 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: s_mov_b32 s36, 0x84400 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s36 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s16, v7, 0 +; GFX6-NEXT: v_readlane_b32 s17, v7, 1 +; GFX6-NEXT: v_readlane_b32 s18, v7, 2 +; GFX6-NEXT: v_readlane_b32 s19, v7, 3 +; GFX6-NEXT: v_readlane_b32 s20, v7, 4 +; GFX6-NEXT: v_readlane_b32 s21, v7, 5 +; GFX6-NEXT: v_readlane_b32 s22, v7, 6 +; GFX6-NEXT: v_readlane_b32 s23, v7, 7 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s24, 0 +; GFX6-NEXT: v_writelane_b32 v7, s25, 1 +; GFX6-NEXT: v_writelane_b32 v7, s26, 2 +; GFX6-NEXT: v_writelane_b32 v7, s27, 3 +; GFX6-NEXT: v_writelane_b32 v7, s28, 4 +; GFX6-NEXT: v_writelane_b32 v7, s29, 5 +; GFX6-NEXT: v_writelane_b32 v7, s30, 6 +; GFX6-NEXT: v_writelane_b32 v7, s31, 7 +; GFX6-NEXT: s_mov_b32 s36, 0x85400 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: s_mov_b32 s36, 0x84c00 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s36 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s24, v7, 0 +; GFX6-NEXT: v_readlane_b32 s25, v7, 1 +; GFX6-NEXT: v_readlane_b32 s26, v7, 2 +; GFX6-NEXT: v_readlane_b32 s27, v7, 3 +; GFX6-NEXT: v_readlane_b32 s28, v7, 4 +; GFX6-NEXT: v_readlane_b32 s29, v7, 5 +; GFX6-NEXT: v_readlane_b32 s30, v7, 6 +; GFX6-NEXT: v_readlane_b32 s31, v7, 7 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s0, 0 +; GFX6-NEXT: v_writelane_b32 v7, s1, 1 +; GFX6-NEXT: v_writelane_b32 v7, s2, 2 +; GFX6-NEXT: v_writelane_b32 v7, s3, 3 +; GFX6-NEXT: s_mov_b32 s36, 0x85c00 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s4, 0 +; GFX6-NEXT: v_writelane_b32 v7, s5, 1 +; GFX6-NEXT: v_writelane_b32 v7, s6, 2 +; GFX6-NEXT: v_writelane_b32 v7, s7, 3 +; GFX6-NEXT: s_mov_b32 s36, 0x86000 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 3 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_writelane_b32 v7, s2, 0 +; GFX6-NEXT: v_writelane_b32 v7, s3, 1 +; GFX6-NEXT: s_mov_b32 s36, 0x86400 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s36 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 0xff +; GFX6-NEXT: s_mov_b32 s36, 0x85400 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s36 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s0, v7, 0 +; GFX6-NEXT: v_readlane_b32 s1, v7, 1 +; GFX6-NEXT: v_readlane_b32 s2, v7, 2 +; GFX6-NEXT: v_readlane_b32 s3, v7, 3 +; GFX6-NEXT: v_readlane_b32 s4, v7, 4 +; GFX6-NEXT: v_readlane_b32 s5, v7, 5 +; GFX6-NEXT: v_readlane_b32 s6, v7, 6 +; GFX6-NEXT: v_readlane_b32 s7, v7, 7 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: v_mov_b32_e32 v9, 0x2180 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, v9, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s36, v7, 0 +; GFX6-NEXT: v_readlane_b32 s37, v7, 1 +; GFX6-NEXT: v_readlane_b32 s38, v7, 2 +; GFX6-NEXT: v_readlane_b32 s39, v7, 3 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: s_mov_b64 vcc, s[34:35] +; GFX6-NEXT: s_mov_b64 s[44:45], exec +; GFX6-NEXT: s_mov_b64 exec, 3 +; GFX6-NEXT: v_mov_b32_e32 v9, 0x2190 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, v9, s[40:43], 0 offen ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s34, v7, 0 +; GFX6-NEXT: v_readlane_b32 s35, v7, 1 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[44:45] +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35] +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_mov_b64 s[34:35], vcc +; GFX6-NEXT: s_mov_b64 s[4:5], exec +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: s_mov_b32 s6, 0x85c00 +; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s0, v7, 0 +; GFX6-NEXT: v_readlane_b32 s1, v7, 1 +; GFX6-NEXT: v_readlane_b32 s2, v7, 2 +; GFX6-NEXT: v_readlane_b32 s3, v7, 3 +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[4:5] +; GFX6-NEXT: s_mov_b32 s4, 0x83c00 +; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s4 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s4 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s4 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s4 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_mov_b32 s4, 0x84400 +; GFX6-NEXT: buffer_store_dword v13, off, s[40:43], s4 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v14, off, s[40:43], s4 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v15, off, s[40:43], s4 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v16, off, s[40:43], s4 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_mov_b32 s4, 0x84c00 +; GFX6-NEXT: buffer_store_dword v17, off, s[40:43], s4 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v18, off, s[40:43], s4 offset:4 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v19, off, s[40:43], s4 offset:8 ; 4-byte Folded Spill +; GFX6-NEXT: buffer_store_dword v20, off, s[40:43], s4 offset:12 ; 4-byte Folded Spill +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: s_mov_b32 s4, 0x84c00 +; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v20, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x84400 +; GFX6-NEXT: buffer_load_dword v13, off, s[40:43], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v14, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v15, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v16, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s4, 0x83c00 +; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: ;;#ASMSTART +; GFX6-NEXT: ;;#ASMEND +; GFX6-NEXT: .LBB1_2: ; %ret +; GFX6-NEXT: s_or_b64 exec, exec, s[34:35] +; GFX6-NEXT: s_mov_b64 s[8:9], exec +; GFX6-NEXT: s_mov_b64 exec, 15 +; GFX6-NEXT: s_mov_b32 s10, 0x80400 +; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s10 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: v_readlane_b32 s4, v10, 0 +; GFX6-NEXT: v_readlane_b32 s5, v10, 1 +; GFX6-NEXT: v_readlane_b32 s6, v10, 2 +; GFX6-NEXT: v_readlane_b32 s7, v10, 3 +; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], 0 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: s_mov_b64 exec, s[8:9] +; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] +; GFX6-NEXT: s_mov_b32 s6, 0x83800 +; GFX6-NEXT: v_lshl_b64 v[4:5], v[5:6], 8 +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x83400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:240 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x83000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:224 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x82c00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:208 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x82800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:192 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x82400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:176 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x82000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:160 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x81c00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:144 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x81800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:128 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x81400 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:112 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x81000 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:96 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x80800 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:80 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_mov_b32 s6, 0x80c00 +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:64 +; GFX6-NEXT: buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:48 +; GFX6-NEXT: buffer_store_dwordx4 v[13:16], v[4:5], s[0:3], 0 addr64 offset:32 +; GFX6-NEXT: s_waitcnt expcnt(2) +; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s6 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 offset:4 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s6 offset:8 ; 4-byte Folded Reload +; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s6 offset:12 ; 4-byte Folded Reload +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:16 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 +; GFX6-NEXT: s_endpgm +; +; GFX9-FLATSCR-LABEL: test_limited_sgpr: +; GFX9-FLATSCR: ; %bb.0: ; %entry +; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 +; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v5, -1, v0 +; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 8, v5 +; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:240 +; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 +; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x2090 +; GFX9-FLATSCR-NEXT: s_movk_i32 s33, 0x2080 +; GFX9-FLATSCR-NEXT: s_movk_i32 s34, 0x2060 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2050 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 1 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v0, s[38:39] offset:224 +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:208 +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v0, s[38:39] offset:192 +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:176 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s8 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v0, s[38:39] offset:160 +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s33 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:128 +; GFX9-FLATSCR-NEXT: s_movk_i32 s33, 0x20c0 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s33 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:112 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s34 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:96 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:80 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2040 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:64 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2030 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:48 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2020 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:32 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2070 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:16 +; GFX9-FLATSCR-NEXT: s_movk_i32 s44, 0x2010 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s44 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v0, s[38:39] +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 16 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: v_lshl_add_u32 v4, v0, 13, v4 +; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-FLATSCR-NEXT: scratch_store_dword v4, v7, off +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[0:7] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[8:15] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[16:23] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[24:31] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[40:43] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s[38:39] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; def s33 +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[34:35], vcc +; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 +; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb0 +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[38:39] +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20f0 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 +; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20f0 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: ;;#ASMSTART +; GFX9-FLATSCR-NEXT: ;;#ASMEND +; GFX9-FLATSCR-NEXT: .LBB1_2: ; %ret +; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[34:35] +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: v_lshlrev_b64 v[4:5], 8, v[5:6] +; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s37 +; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v4, vcc, s36, v4 +; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:240 +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[8:11], off offset:224 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2090 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:208 +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:192 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2080 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:176 +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:160 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20c0 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2060 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2050 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(2) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:144 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(2) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:128 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2040 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(3) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:112 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:96 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2030 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:80 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2020 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:64 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2070 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:48 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 +; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload +; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[0:3], off +; GFX9-FLATSCR-NEXT: s_endpgm +; +; GFX10-FLATSCR-LABEL: test_limited_sgpr: +; GFX10-FLATSCR: ; %bb.0: ; %entry +; GFX10-FLATSCR-NEXT: s_add_u32 s2, s2, s5 +; GFX10-FLATSCR-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 +; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 1 +; GFX10-FLATSCR-NEXT: s_mov_b32 s33, exec_lo +; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v5, -1, v0 +; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 8, v5 +; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-NEXT: s_clause 0xf +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[64:67], v0, s[38:39] offset:240 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[60:63], v0, s[38:39] offset:224 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[56:59], v0, s[38:39] offset:208 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[52:55], v0, s[38:39] offset:192 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[48:51], v0, s[38:39] offset:176 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[44:47], v0, s[38:39] offset:160 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[40:43], v0, s[38:39] offset:144 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[36:39], v0, s[38:39] offset:128 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[32:35], v0, s[38:39] offset:112 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[28:31], v0, s[38:39] offset:96 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[24:27], v0, s[38:39] offset:80 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v0, s[38:39] offset:64 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v0, s[38:39] offset:48 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v0, s[38:39] offset:32 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v0, s[38:39] offset:16 +; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v0, s[38:39] +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: v_lshl_add_u32 v4, v0, 13, 16 +; GFX10-FLATSCR-NEXT: scratch_store_dword v4, v7, off +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[0:7] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[8:15] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[16:23] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[24:31] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[40:43] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s[34:35] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; def s38 +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: v_cmpx_eq_u32_e32 0, v0 +; GFX10-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 +; GFX10-FLATSCR-NEXT: ; %bb.1: ; %bb0 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[34:35] +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v88, v59 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v92, v63 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v87, v58 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v86, v57 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v85, v56 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v91, v62 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v90, v61 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v89, v60 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v35 +; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[64:67], s0 ; 16-byte Folded Spill +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v68, v39 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v34 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v33 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v32 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v67, v38 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v66, v37 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v65, v36 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v11 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v72, v43 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v76, v47 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v80, v51 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v84, v55 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v8 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v71, v42 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v70, v41 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v69, v40 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v15 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v75, v46 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v74, v45 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v73, v44 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v19 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v79, v50 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v78, v49 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v77, v48 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v23 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v83, v54 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v82, v53 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v81, v52 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v27 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v31 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v10 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v9 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v12 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v16 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v20 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v24 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v28 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v14 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v13 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v18 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v17 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v22 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v21 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v26 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v25 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v30 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v29 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, v33 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v53 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v49 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v45 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v41 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, v37 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v34 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v35 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, v36 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v57 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v54 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v55 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v56 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v50 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v51 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, v52 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v46 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v47 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v48 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, v42 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v43 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v44 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v38 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v39 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v40 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v58 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v59 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v68 +; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[64:67], off, s0 ; 16-byte Folded Reload +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v89 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v85 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v81 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v77 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v73 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v69 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v61, v90 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v62, v91 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v63, v92 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v86 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v87 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v88 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v82 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v83 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v84 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v78 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v79 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v80 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v74 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v75 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v76 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v70 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v71 +; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v72 +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: ;;#ASMSTART +; GFX10-FLATSCR-NEXT: ;;#ASMEND +; GFX10-FLATSCR-NEXT: .LBB1_2: ; %ret +; GFX10-FLATSCR-NEXT: s_or_b32 exec_lo, exec_lo, s33 +; GFX10-FLATSCR-NEXT: v_lshlrev_b64 v[4:5], 8, v[5:6] +; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, vcc_lo, s36, v4 +; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s37, v5, vcc_lo +; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[64:67], off offset:240 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[60:63], off offset:224 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[56:59], off offset:208 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[52:55], off offset:192 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[48:51], off offset:176 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[44:47], off offset:160 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[40:43], off offset:144 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[36:39], off offset:128 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[32:35], off offset:112 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[28:31], off offset:96 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[24:27], off offset:80 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:64 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:48 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:32 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[8:11], off offset:16 +; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[0:3], off +; GFX10-FLATSCR-NEXT: s_endpgm entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -141,15 +11081,14 @@ entry: %sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () %sgpr4 = call <4 x i32> asm sideeffect "; def $0", "=s" () %sgpr5 = call <2 x i32> asm sideeffect "; def $0", "=s" () - %sgpr6 = call <2 x i32> asm sideeffect "; def $0", "=s" () - %sgpr7 = call i32 asm sideeffect "; def $0", "=s" () + %sgpr6 = call i32 asm sideeffect "; def $0", "=s" () %cmp = icmp eq i32 %x, 0 br i1 %cmp, label %bb0, label %ret bb0: ; create SGPR pressure - call void asm sideeffect "; use $0,$1,$2,$3,$4,$5,$6", "s,s,s,s,s,s,s,s"(<8 x i32> %sgpr0, <8 x i32> %sgpr1, <8 x i32> %sgpr2, <8 x i32> %sgpr3, <4 x i32> %sgpr4, <2 x i32> %sgpr5, <2 x i32> %sgpr6, i32 %sgpr7) + call void asm sideeffect "; use $0,$1,$2,$3,$4,$5", "s,s,s,s,s,s,s"(<8 x i32> %sgpr0, <8 x i32> %sgpr1, <8 x i32> %sgpr2, <8 x i32> %sgpr3, <4 x i32> %sgpr4, <2 x i32> %sgpr5, i32 %sgpr6) ; mark most VGPR registers as used to increase register pressure call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () @@ -173,3 +11112,6 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 attributes #0 = { "amdgpu-waves-per-eu"="10,10" } attributes #1 = { nounwind readnone } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; FLATSCR: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll index 91d2ec82c81e7..333d33dd76c1a 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; Spill an SGPR to scratch without having spare SGPRs available to save exec +; The test was originally written to spill an SGPR to scratch without having spare SGPRs +; available to save exec. This scenario won't be true anymore as we reseve SGPR(s) +; upfront for saving exec. define amdgpu_kernel void @test() #1 { ; GFX10-LABEL: test: @@ -18,44 +20,13 @@ define amdgpu_kernel void @test() #1 { ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s[8:12] ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_not_b64 exec, exec -; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; GFX10-NEXT: v_writelane_b32 v0, s8, 0 -; GFX10-NEXT: v_writelane_b32 v0, s9, 1 -; GFX10-NEXT: v_writelane_b32 v0, s10, 2 -; GFX10-NEXT: v_writelane_b32 v0, s11, 3 -; GFX10-NEXT: v_writelane_b32 v0, s12, 4 -; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_not_b64 exec, exec -; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_not_b64 exec, exec -; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_not_b64 exec, exec ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s[0:7] ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_mov_b64 s[6:7], exec -; GFX10-NEXT: s_mov_b64 exec, 31 -; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_readlane_b32 s0, v0, 0 -; GFX10-NEXT: v_readlane_b32 s1, v0, 1 -; GFX10-NEXT: v_readlane_b32 s2, v0, 2 -; GFX10-NEXT: v_readlane_b32 s3, v0, 3 -; GFX10-NEXT: v_readlane_b32 s4, v0, 4 -; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b64 exec, s[6:7] ; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use s[0:4] +; GFX10-NEXT: ; use s[8:12] ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir new file mode 100644 index 0000000000000..61a893796fb69 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -0,0 +1,387 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=si-lower-sgpr-spills -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# A simple SGPR spill. Implicit def for lane VGPR should be inserted just before the spill instruction. +--- +name: sgpr32_spill +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + bb.0: + liveins: $sgpr10 + ; GCN-LABEL: name: sgpr32_spill + ; GCN: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]] + ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_]], 0 + ; GCN-NEXT: KILL [[V_WRITELANE_B32_]] + ; GCN-NEXT: SI_RETURN + S_NOP 0 + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + SI_RETURN +... + +# Needed an additional virtual lane register as the lanes of current register are fully occupied while spilling a wide SGPR tuple. +# There must be two implicit def for the two lane VGPRs. + +--- +name: sgpr_spill_lane_crossover +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } + - { id: 1, type: spill-slot, size: 128, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + bb.0: + liveins: $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-LABEL: name: sgpr_spill_lane_crossover + ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr0, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr64, 0, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc0, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x00 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr65, 1, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc1, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x04 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr66, 2, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc2, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x08 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr67, 3, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc3, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x0c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr68, 4, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc4, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x10 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr69, 5, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc5, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x14 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr70, 6, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc6, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x18 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr71, 7, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc7, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x1c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr72, 8, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc8, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x20 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr73, 9, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xc9, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x24 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr74, 10, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xca, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x28 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr75, 11, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xcb, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x2c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr76, 12, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xcc, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x30 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr77, 13, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xcd, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x34 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr78, 14, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xce, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x38 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr79, 15, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xcf, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x3c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr80, 16, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd0, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x40 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr81, 17, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd1, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x44 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr82, 18, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd2, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x48 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr83, 19, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd3, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x4c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr84, 20, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd4, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x50 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr85, 21, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd5, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x54 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr86, 22, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd6, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x58 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr87, 23, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd7, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x5c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr88, 24, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd8, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x60 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr89, 25, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xd9, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x64 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr90, 26, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xda, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x68 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr91, 27, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xdb, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x6c + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr92, 28, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xdc, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x70 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr93, 29, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xdd, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x74 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 30, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xde, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x78 + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr95, 31, $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0xdf, 0x08, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x7c + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]] + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr64, 1, [[V_WRITELANE_B32_1]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr65, 2, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr66, 3, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr67, 4, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr68, 5, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr69, 6, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr70, 7, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr71, 8, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr72, 9, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr73, 10, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr74, 11, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr75, 12, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr76, 13, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr77, 14, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr78, 15, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr79, 16, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr80, 17, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr81, 18, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr82, 19, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr83, 20, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr84, 21, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr85, 22, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr86, 23, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr87, 24, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr88, 25, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr89, 26, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr90, 27, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr91, 28, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr92, 29, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr93, 30, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr94, 31, [[V_WRITELANE_B32_1]], implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr95, 32, [[V_WRITELANE_B32_1]], implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $sgpr64 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN-NEXT: $sgpr65 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2 + ; GCN-NEXT: $sgpr66 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3 + ; GCN-NEXT: $sgpr67 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4 + ; GCN-NEXT: $sgpr68 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5 + ; GCN-NEXT: $sgpr69 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6 + ; GCN-NEXT: $sgpr70 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7 + ; GCN-NEXT: $sgpr71 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8 + ; GCN-NEXT: $sgpr72 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9 + ; GCN-NEXT: $sgpr73 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10 + ; GCN-NEXT: $sgpr74 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 11 + ; GCN-NEXT: $sgpr75 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 12 + ; GCN-NEXT: $sgpr76 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 13 + ; GCN-NEXT: $sgpr77 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 14 + ; GCN-NEXT: $sgpr78 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 15 + ; GCN-NEXT: $sgpr79 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 16 + ; GCN-NEXT: $sgpr80 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 17 + ; GCN-NEXT: $sgpr81 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 18 + ; GCN-NEXT: $sgpr82 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 19 + ; GCN-NEXT: $sgpr83 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 20 + ; GCN-NEXT: $sgpr84 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 21 + ; GCN-NEXT: $sgpr85 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 22 + ; GCN-NEXT: $sgpr86 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 23 + ; GCN-NEXT: $sgpr87 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 24 + ; GCN-NEXT: $sgpr88 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 25 + ; GCN-NEXT: $sgpr89 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 26 + ; GCN-NEXT: $sgpr90 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 27 + ; GCN-NEXT: $sgpr91 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 28 + ; GCN-NEXT: $sgpr92 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 29 + ; GCN-NEXT: $sgpr93 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 30 + ; GCN-NEXT: $sgpr94 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 31 + ; GCN-NEXT: $sgpr95 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 32 + ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0 + ; GCN-NEXT: $sgpr95 = V_READLANE_B32 $vgpr0, 31 + ; GCN-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0 + ; GCN-NEXT: KILL [[V_WRITELANE_B32_1]] + ; GCN-NEXT: SI_RETURN implicit $sgpr10 + S_NOP 0 + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_NOP 0 + renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + SI_RETURN implicit $sgpr10 +... + +# The implicit def for the lane VGPR should be inserted at the common dominator block (the entry block here). + +--- +name: lane_vgpr_implicit_def_at_common_dominator_block +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN-LABEL: name: lane_vgpr_implicit_def_at_common_dominator_block + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr10, $sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = S_MOV_B32 10 + ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]] + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = S_MOV_B32 20 + ; GCN-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_1]] + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0 + ; GCN-NEXT: KILL [[V_WRITELANE_B32_1]] + ; GCN-NEXT: SI_RETURN implicit $sgpr10 + bb.0: + liveins: $sgpr10, $sgpr11 + S_NOP 0 + S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.2, implicit killed $scc + bb.1: + liveins: $sgpr10 + $sgpr10 = S_MOV_B32 10 + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_BRANCH %bb.3 + bb.2: + liveins: $sgpr10 + $sgpr10 = S_MOV_B32 20 + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_BRANCH %bb.3 + bb.3: + liveins: $sgpr10 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + SI_RETURN implicit $sgpr10 +... + +# The common dominator block is visited only at the end. The insertion point was initially identified to the +# terminator instruction in the dominator block which later becomes the point where a spill get inserted in the same block. + +--- +name: dominator_block_follows_the_successors_bbs +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true +body: | + ; GCN-LABEL: name: dominator_block_follows_the_successors_bbs + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr10, $sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[DEF]], 0 + ; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = V_READLANE_B32 [[DEF]], 0 + ; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr10, $sgpr11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr10 = S_MOV_B32 10 + ; GCN-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[V_WRITELANE_B32_]] + ; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: liveins: $sgpr10 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: KILL [[V_WRITELANE_B32_]] + ; GCN-NEXT: SI_RETURN implicit $sgpr10 + bb.0: + liveins: $sgpr10, $sgpr11 + S_NOP 0 + S_BRANCH %bb.3 + bb.1: + liveins: $sgpr10 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc + S_BRANCH %bb.2 + bb.2: + liveins: $sgpr10 + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc + S_BRANCH %bb.3 + bb.3: + liveins: $sgpr10, $sgpr11 + $sgpr10 = S_MOV_B32 10 + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 + S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.2, implicit killed $scc + S_BRANCH %bb.1 + bb.4: + liveins: $sgpr10 + S_NOP 0 + SI_RETURN implicit $sgpr10 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir index 2dfd7d87a1841..059b2ebbf94e8 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir @@ -16,7 +16,7 @@ body: | ; GCN-LABEL: name: partial_spill_v128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $agpr28_agpr29, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -47,7 +47,7 @@ body: | ; GCN-LABEL: name: partial_spill_v128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $agpr28_agpr29, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -57,7 +57,7 @@ body: | ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -80,7 +80,7 @@ body: | ; GCN-LABEL: name: partial_spill_v128_3_of_4 ; GCN: liveins: $agpr28, $agpr29, $agpr30, $agpr31, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -91,8 +91,8 @@ body: | ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -115,7 +115,7 @@ body: | ; GCN-LABEL: name: full_spill_v128 ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -130,9 +130,9 @@ body: | ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -154,7 +154,7 @@ body: | ; GCN-LABEL: name: partial_spill_a128_1_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr52_vgpr53 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -185,7 +185,7 @@ body: | ; GCN-LABEL: name: partial_spill_a128_2_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr52_vgpr53 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -195,7 +195,7 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -218,7 +218,7 @@ body: | ; GCN-LABEL: name: partial_spill_a128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 @@ -229,8 +229,8 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -253,7 +253,7 @@ body: | ; GCN-LABEL: name: full_spill_a128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -268,9 +268,9 @@ body: | ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll index fd602280a5e82..b01f89cdfeedb 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -10,15 +10,15 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 { ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4) ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[PRED_COPY:%[0-9]+]]:areg_128 = PRED_COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[PRED_COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def undef %22.sub0 - ; GCN-NEXT: undef %24.sub0:av_64 = COPY %22.sub0 + ; GCN-NEXT: undef %24.sub0:av_64 = PRED_COPY %22.sub0 ; GCN-NEXT: SI_SPILL_AV64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] - ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: [[PRED_COPY1:%[0-9]+]]:vreg_128 = PRED_COPY [[V_MFMA_I32_4X4X4I8_e64_]] + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[PRED_COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0 + ; GCN-NEXT: undef %23.sub0:vreg_64 = PRED_COPY [[SI_SPILL_AV64_RESTORE]].sub0 ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3211273 /* reguse:VReg_64 */, %23 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll new file mode 100644 index 0000000000000..a03e75773aebf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -verify-machineinstrs -o - %s | FileCheck %s + +; Regression test for `processFunctionBeforeFrameFinalized`: +; Check that it correctly updates RegisterScavenger so we +; don't end up with bad machine code due to using undefined +; physical registers. + +define void @test() { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %bb.0 +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: .LBB0_1: ; %bb.1 +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 +; CHECK-NEXT: ; %bb.2: ; %bb.2 +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_3: ; %bb.3 +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; implicit-def: $sgpr4 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_readfirstlane_b32 s6, v0 +; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 s[4:5], -1 +; CHECK-NEXT: s_mov_b32 s7, 0 +; CHECK-NEXT: s_cmp_eq_u32 s6, s7 +; CHECK-NEXT: v_writelane_b32 v0, s4, 0 +; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: s_mov_b64 s[10:11], exec +; CHECK-NEXT: s_mov_b64 exec, -1 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 +; CHECK-NEXT: ; %bb.4: ; %bb.4 +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: v_writelane_b32 v0, s4, 0 +; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: .LBB0_5: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: v_readlane_b32 s4, v0, 0 +; CHECK-NEXT: v_readlane_b32 s5, v0, 1 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; CHECK-NEXT: s_mov_b32 s4, 1 +; CHECK-NEXT: ; implicit-def: $sgpr5 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s4 +; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 +; CHECK-NEXT: ; %bb.6: ; %bb.5 +; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 +; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: s_mov_b64 exec, s[10:11] +; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb.0: + br label %bb.1 +bb.1: ; preds = %bb.4, %bb.0 + br i1 poison, label %bb.2, label %bb.3 +bb.2: ; preds = %bb.1 + br label %bb.3 +bb.3: ; preds = %bb.2, %bb.1 + %call = tail call i32 @llvm.amdgcn.readfirstlane(i32 poison) + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %bb.5, label %bb.4 +bb.4: ; preds = %bb.3 + br label %bb.1 +bb.5: ; preds = %bb.3 + ret void +} + +declare i32 @llvm.amdgcn.readfirstlane(i32) diff --git a/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll new file mode 100644 index 0000000000000..f216a4514edad --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +; Callee must preserve the VGPR modified by writelane even if it is marked Caller-saved. + +declare i32 @llvm.amdgcn.writelane(i32, i32, i32) + +define void @sgpr_spill_writelane() { +; GCN-LABEL: sgpr_spill_writelane: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: v_writelane_b32 v0, s35, 0 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s35, v0, 0 +; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "", "~{s35}"() + ret void +} + +; FIXME: The writelane intrinsic doesn't really overwrite any inactive lanes +; and hence there is no need to preserve the VGPR it modifies. +define void @device_writelane_intrinsic(i32 addrspace(1)* %out, i32 %src) { +; GCN-LABEL: device_writelane_intrinsic: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: v_mov_b32_e32 v3, 15 +; GCN-NEXT: v_readfirstlane_b32 s4, v2 +; GCN-NEXT: v_writelane_b32 v3, s4, 23 +; GCN-NEXT: global_store_dword v[0:1], v3, off +; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %writelane = call i32 @llvm.amdgcn.writelane(i32 %src, i32 23, i32 15) + store i32 %writelane, i32 addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_kernel void @kernel_writelane_intrinsic(i32 addrspace(1)* %out, i32 %src0, i32 %src1) { +; GCN-LABEL: kernel_writelane_intrinsic: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v1, 45 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 m0, s3 +; GCN-NEXT: v_writelane_b32 v1, s2, m0 +; GCN-NEXT: global_store_dword v0, v1, s[0:1] +; GCN-NEXT: s_endpgm + %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 45) + store i32 %writelane, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir index df0c836b556e2..2514457dba7cb 100644 --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -32,32 +32,29 @@ body: | ; EXPANDED-LABEL: name: spill_restore_sgpr192 ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) - ; EXPANDED-NEXT: liveins: $vgpr0 ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: bb.1: ; EXPANDED-NEXT: successors: %bb.2(0x80000000) - ; EXPANDED-NEXT: liveins: $vgpr0 ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: S_NOP 1 ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: bb.2: - ; EXPANDED-NEXT: liveins: $vgpr0 - ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 - ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 - ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 - ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 - ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5 ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 bb.0: S_NOP 0, implicit-def %0:sgpr_192 diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir index 09f208246995c..a361ba0d64a86 100644 --- a/llvm/test/CodeGen/AMDGPU/spill224.mir +++ b/llvm/test/CodeGen/AMDGPU/spill224.mir @@ -30,34 +30,31 @@ body: | ; EXPANDED-LABEL: name: spill_restore_sgpr224 ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) - ; EXPANDED-NEXT: liveins: $vgpr0 ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: bb.1: ; EXPANDED-NEXT: successors: %bb.2(0x80000000) - ; EXPANDED-NEXT: liveins: $vgpr0 ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: S_NOP 1 ; EXPANDED-NEXT: {{ $}} ; EXPANDED-NEXT: bb.2: - ; EXPANDED-NEXT: liveins: $vgpr0 - ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1 - ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2 - ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3 - ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4 - ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5 - ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1 + ; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2 + ; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3 + ; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4 + ; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5 + ; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6 ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 bb.0: S_NOP 0, implicit-def %0:sgpr_224 diff --git a/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir new file mode 100644 index 0000000000000..4d4b8a07dda85 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s + +# Check that liverange splitting does not create copies that overlap within a bundle. +# By overlap, we mean that they write to the same subregisters. +# e.g. the following bundle is desirable +# %0.sub1_sub2 = COPY ... { +# %0.sub3 = COPY ... +# } +# but the following bundle isn't desirable as the overlap of the copies can make +# virtregrewriter fail due to cycles in the copy bundle. +# %0.sub1_sub2 = COPY ... { +# %0.sub2_sub3 = COPY ... +# } +--- +name: split_liverange_copy_overlap_31 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: split_liverange_copy_overlap_31 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_1024_align2 = COPY [[DEF1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %3:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24:av_1024_align2 = PRED_COPY [[COPY]].sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24 + ; CHECK-NEXT: internal %6.sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16:av_1024_align2 = PRED_COPY [[COPY]].sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16 + ; CHECK-NEXT: } + ; CHECK-NEXT: %6.sub0:av_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit %6.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit %6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:av_1024_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: undef %4.sub0:vreg_1024_align2 = COPY [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit %4 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024_align2 = IMPLICIT_DEF + %2:vreg_1024_align2 = COPY %1 + + bb.1: + %5:vreg_64 = IMPLICIT_DEF + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.2: + %2.sub0:vreg_1024_align2 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0 + + bb.3: + S_NOP 0, implicit %2 + + bb.4: + %2:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.5: + undef %4.sub0:vreg_1024_align2 = COPY %0 + S_NOP 0, implicit %4 +... +--- +name: split_liverange_copy_overlap_30 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: split_liverange_copy_overlap_30 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_1024 = COPY [[DEF1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %3:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: S_NOP 0, implicit [[DEF1]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = PRED_COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK-NEXT: internal %6.sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24:av_1024 = PRED_COPY [[COPY]].sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24 + ; CHECK-NEXT: internal %6.sub25_sub26_sub27_sub28_sub29_sub30:av_1024 = PRED_COPY [[COPY]].sub25_sub26_sub27_sub28_sub29_sub30 + ; CHECK-NEXT: } + ; CHECK-NEXT: %6.sub0:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: %6.sub31:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit %6.sub0, implicit %6.sub31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit %6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:av_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: undef %4.sub0:vreg_1024 = COPY [[DEF]] + ; CHECK-NEXT: S_NOP 0, implicit %4 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = COPY %1 + + bb.1: + %5:vreg_64 = IMPLICIT_DEF + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.2: + %2.sub0:vreg_1024 = IMPLICIT_DEF + %2.sub31:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0, implicit %2.sub31 + + bb.3: + S_NOP 0, implicit %2 + + bb.4: + %2:vreg_1024 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.5: + undef %4.sub0:vreg_1024 = COPY %0 + S_NOP 0, implicit %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 9085a2f8101f8..7f8aef5add88f 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -264,22 +264,22 @@ body: | ; RA-NEXT: [[DEF2]].sub8:sgpr_512 = S_MOV_B32 -1 ; RA-NEXT: [[DEF2]].sub13:sgpr_512 = S_MOV_B32 -1 ; RA-NEXT: [[DEF2]].sub14:sgpr_512 = S_MOV_B32 -1 - ; RA-NEXT: undef %15.sub4_sub5:sgpr_512 = COPY [[DEF2]].sub4_sub5 { - ; RA-NEXT: internal %15.sub10_sub11:sgpr_512 = COPY [[DEF2]].sub10_sub11 - ; RA-NEXT: internal %15.sub7:sgpr_512 = COPY [[DEF2]].sub7 - ; RA-NEXT: internal %15.sub8:sgpr_512 = COPY [[DEF2]].sub8 - ; RA-NEXT: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13 - ; RA-NEXT: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14 + ; RA-NEXT: undef %15.sub4_sub5:sgpr_512 = PRED_COPY [[DEF2]].sub4_sub5 { + ; RA-NEXT: internal %15.sub10_sub11:sgpr_512 = PRED_COPY [[DEF2]].sub10_sub11 + ; RA-NEXT: internal %15.sub7:sgpr_512 = PRED_COPY [[DEF2]].sub7 + ; RA-NEXT: internal %15.sub8:sgpr_512 = PRED_COPY [[DEF2]].sub8 + ; RA-NEXT: internal %15.sub13:sgpr_512 = PRED_COPY [[DEF2]].sub13 + ; RA-NEXT: internal %15.sub14:sgpr_512 = PRED_COPY [[DEF2]].sub14 ; RA-NEXT: } ; RA-NEXT: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { - ; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 - ; RA-NEXT: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 - ; RA-NEXT: internal %14.sub8:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub8 - ; RA-NEXT: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 - ; RA-NEXT: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 + ; RA-NEXT: undef %14.sub4_sub5:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { + ; RA-NEXT: internal %14.sub10_sub11:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 + ; RA-NEXT: internal %14.sub7:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub7 + ; RA-NEXT: internal %14.sub8:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub8 + ; RA-NEXT: internal %14.sub13:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub13 + ; RA-NEXT: internal %14.sub14:sgpr_512 = PRED_COPY [[SI_SPILL_S512_RESTORE]].sub14 ; RA-NEXT: } ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load (s32)) ; RA-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load (s32)) @@ -303,12 +303,12 @@ body: | ; VR-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; VR-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 ; VR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; VR-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17 - ; VR-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr19 - ; VR-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 - ; VR-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr20 - ; VR-NEXT: renamable $sgpr21 = COPY killed renamable $sgpr25 - ; VR-NEXT: renamable $sgpr22 = COPY killed renamable $sgpr26 + ; VR-NEXT: renamable $sgpr12_sgpr13 = PRED_COPY killed renamable $sgpr16_sgpr17 + ; VR-NEXT: renamable $sgpr15 = PRED_COPY killed renamable $sgpr19 + ; VR-NEXT: renamable $sgpr18_sgpr19 = PRED_COPY killed renamable $sgpr22_sgpr23 + ; VR-NEXT: renamable $sgpr16 = PRED_COPY killed renamable $sgpr20 + ; VR-NEXT: renamable $sgpr21 = PRED_COPY killed renamable $sgpr25 + ; VR-NEXT: renamable $sgpr22 = PRED_COPY killed renamable $sgpr26 ; VR-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF ; VR-NEXT: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load (s32)) ; VR-NEXT: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load (s32)) diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir index b9fa585409df5..eadc0e8a8d245 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -14,16 +14,16 @@ body: | ; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[PRED_COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; CHECK-NEXT: undef %2.sub3:sgpr_128 = S_MOV_B32 61440 ; CHECK-NEXT: %2.sub2:sgpr_128 = S_MOV_B32 -1 - ; CHECK-NEXT: %2.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; CHECK-NEXT: %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; CHECK-NEXT: undef %3.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 - ; CHECK-NEXT: %3.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub3 - ; CHECK-NEXT: %3.sub2:sgpr_128 = COPY %2.sub2 - ; CHECK-NEXT: %3.sub3:sgpr_128 = COPY %2.sub3 + ; CHECK-NEXT: %2.sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: %2.sub1:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub1 + ; CHECK-NEXT: undef %3.sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: %3.sub1:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX4_IMM]].sub3 + ; CHECK-NEXT: %3.sub2:sgpr_128 = PRED_COPY %2.sub2 + ; CHECK-NEXT: %3.sub3:sgpr_128 = PRED_COPY %2.sub3 ; CHECK-NEXT: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) @@ -62,140 +62,140 @@ body: | ; CHECK-NEXT: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK-NEXT: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; CHECK-NEXT: undef %48.sub2:vreg_128 = COPY %47.sub2 + ; CHECK-NEXT: undef %48.sub2:vreg_128 = PRED_COPY %47.sub2 ; CHECK-NEXT: %48.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: undef %50.sub0:vreg_128 = COPY %48.sub0 { - ; CHECK-NEXT: internal %50.sub2:vreg_128 = COPY %48.sub2 + ; CHECK-NEXT: undef %50.sub0:vreg_128 = PRED_COPY %48.sub0 { + ; CHECK-NEXT: internal %50.sub2:vreg_128 = PRED_COPY %48.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %50, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %55.sub2:vreg_128 = COPY %54.sub2 + ; CHECK-NEXT: undef %55.sub2:vreg_128 = PRED_COPY %54.sub2 ; CHECK-NEXT: %55.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: undef %57.sub0:vreg_128 = COPY %55.sub0 { - ; CHECK-NEXT: internal %57.sub2:vreg_128 = COPY %55.sub2 + ; CHECK-NEXT: undef %57.sub0:vreg_128 = PRED_COPY %55.sub0 { + ; CHECK-NEXT: internal %57.sub2:vreg_128 = PRED_COPY %55.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %57, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %62.sub2:vreg_128 = COPY %61.sub2 + ; CHECK-NEXT: undef %62.sub2:vreg_128 = PRED_COPY %61.sub2 ; CHECK-NEXT: %62.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: undef %64.sub0:vreg_128 = COPY %62.sub0 { - ; CHECK-NEXT: internal %64.sub2:vreg_128 = COPY %62.sub2 + ; CHECK-NEXT: undef %64.sub0:vreg_128 = PRED_COPY %62.sub0 { + ; CHECK-NEXT: internal %64.sub2:vreg_128 = PRED_COPY %62.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %64, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %69.sub2:vreg_128 = COPY %68.sub2 + ; CHECK-NEXT: undef %69.sub2:vreg_128 = PRED_COPY %68.sub2 ; CHECK-NEXT: %69.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: undef %71.sub0:vreg_128 = COPY %69.sub0 { - ; CHECK-NEXT: internal %71.sub2:vreg_128 = COPY %69.sub2 + ; CHECK-NEXT: undef %71.sub0:vreg_128 = PRED_COPY %69.sub0 { + ; CHECK-NEXT: internal %71.sub2:vreg_128 = PRED_COPY %69.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %71, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %76.sub2:vreg_128 = COPY %75.sub2 + ; CHECK-NEXT: undef %76.sub2:vreg_128 = PRED_COPY %75.sub2 ; CHECK-NEXT: %76.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: undef %78.sub0:vreg_128 = COPY %76.sub0 { - ; CHECK-NEXT: internal %78.sub2:vreg_128 = COPY %76.sub2 + ; CHECK-NEXT: undef %78.sub0:vreg_128 = PRED_COPY %76.sub0 { + ; CHECK-NEXT: internal %78.sub2:vreg_128 = PRED_COPY %76.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %78, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %83.sub2:vreg_128 = COPY %82.sub2 + ; CHECK-NEXT: undef %83.sub2:vreg_128 = PRED_COPY %82.sub2 ; CHECK-NEXT: %83.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: undef %85.sub0:vreg_128 = COPY %83.sub0 { - ; CHECK-NEXT: internal %85.sub2:vreg_128 = COPY %83.sub2 + ; CHECK-NEXT: undef %85.sub0:vreg_128 = PRED_COPY %83.sub0 { + ; CHECK-NEXT: internal %85.sub2:vreg_128 = PRED_COPY %83.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %85, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %90.sub2:vreg_128 = COPY %89.sub2 + ; CHECK-NEXT: undef %90.sub2:vreg_128 = PRED_COPY %89.sub2 ; CHECK-NEXT: %90.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: undef %140.sub0:vreg_128 = COPY %90.sub0 { - ; CHECK-NEXT: internal %140.sub2:vreg_128 = COPY %90.sub2 + ; CHECK-NEXT: undef %140.sub0:vreg_128 = PRED_COPY %90.sub0 { + ; CHECK-NEXT: internal %140.sub2:vreg_128 = PRED_COPY %90.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %140, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %95.sub2:vreg_128 = COPY %94.sub2 + ; CHECK-NEXT: undef %95.sub2:vreg_128 = PRED_COPY %94.sub2 ; CHECK-NEXT: %95.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: undef %107.sub0:vreg_128 = COPY %95.sub0 { - ; CHECK-NEXT: internal %107.sub2:vreg_128 = COPY %95.sub2 + ; CHECK-NEXT: undef %107.sub0:vreg_128 = PRED_COPY %95.sub0 { + ; CHECK-NEXT: internal %107.sub2:vreg_128 = PRED_COPY %95.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %107, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %100.sub2:vreg_128 = COPY %99.sub2 + ; CHECK-NEXT: undef %100.sub2:vreg_128 = PRED_COPY %99.sub2 ; CHECK-NEXT: %100.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef %101.sub0:vreg_128 = COPY %100.sub0 { - ; CHECK-NEXT: internal %101.sub2:vreg_128 = COPY %100.sub2 + ; CHECK-NEXT: undef %101.sub0:vreg_128 = PRED_COPY %100.sub0 { + ; CHECK-NEXT: internal %101.sub2:vreg_128 = PRED_COPY %100.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %105.sub2:vreg_128 = COPY %104.sub2 + ; CHECK-NEXT: undef %105.sub2:vreg_128 = PRED_COPY %104.sub2 ; CHECK-NEXT: %105.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef %106.sub0:vreg_128 = COPY %105.sub0 { - ; CHECK-NEXT: internal %106.sub2:vreg_128 = COPY %105.sub2 + ; CHECK-NEXT: undef %106.sub0:vreg_128 = PRED_COPY %105.sub0 { + ; CHECK-NEXT: internal %106.sub2:vreg_128 = PRED_COPY %105.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %139.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: undef %158.sub0:vreg_128 = COPY %139.sub0 { - ; CHECK-NEXT: internal %158.sub2:vreg_128 = COPY %139.sub2 + ; CHECK-NEXT: undef %158.sub0:vreg_128 = PRED_COPY %139.sub0 { + ; CHECK-NEXT: internal %158.sub2:vreg_128 = PRED_COPY %139.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %158, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %186.sub2:vreg_128 = COPY %185.sub2 + ; CHECK-NEXT: undef %186.sub2:vreg_128 = PRED_COPY %185.sub2 ; CHECK-NEXT: %186.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef %188.sub0:vreg_128 = COPY %186.sub0 { - ; CHECK-NEXT: internal %188.sub2:vreg_128 = COPY %186.sub2 + ; CHECK-NEXT: undef %188.sub0:vreg_128 = PRED_COPY %186.sub0 { + ; CHECK-NEXT: internal %188.sub2:vreg_128 = PRED_COPY %186.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %188, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %167.sub2:vreg_128 = COPY %166.sub2 + ; CHECK-NEXT: undef %167.sub2:vreg_128 = PRED_COPY %166.sub2 ; CHECK-NEXT: %167.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: undef %169.sub0:vreg_128 = COPY %167.sub0 { - ; CHECK-NEXT: internal %169.sub2:vreg_128 = COPY %167.sub2 + ; CHECK-NEXT: undef %169.sub0:vreg_128 = PRED_COPY %167.sub0 { + ; CHECK-NEXT: internal %169.sub2:vreg_128 = PRED_COPY %167.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %169, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %114.sub2:vreg_128 = COPY %113.sub2 + ; CHECK-NEXT: undef %114.sub2:vreg_128 = PRED_COPY %113.sub2 ; CHECK-NEXT: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: undef %115.sub0:vreg_128 = COPY %114.sub0 { - ; CHECK-NEXT: internal %115.sub2:vreg_128 = COPY %114.sub2 + ; CHECK-NEXT: undef %115.sub0:vreg_128 = PRED_COPY %114.sub0 { + ; CHECK-NEXT: internal %115.sub2:vreg_128 = PRED_COPY %114.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %119.sub2:vreg_128 = COPY %118.sub2 + ; CHECK-NEXT: undef %119.sub2:vreg_128 = PRED_COPY %118.sub2 ; CHECK-NEXT: %119.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: undef %181.sub0:vreg_128 = COPY %119.sub0 { - ; CHECK-NEXT: internal %181.sub2:vreg_128 = COPY %119.sub2 + ; CHECK-NEXT: undef %181.sub0:vreg_128 = PRED_COPY %119.sub0 { + ; CHECK-NEXT: internal %181.sub2:vreg_128 = PRED_COPY %119.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: SI_SPILL_V128_SAVE %181, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %124.sub2:vreg_128 = COPY %123.sub2 + ; CHECK-NEXT: undef %124.sub2:vreg_128 = PRED_COPY %123.sub2 ; CHECK-NEXT: %124.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK-NEXT: undef %125.sub0:vreg_128 = COPY %124.sub0 { - ; CHECK-NEXT: internal %125.sub2:vreg_128 = COPY %124.sub2 + ; CHECK-NEXT: undef %125.sub0:vreg_128 = PRED_COPY %124.sub0 { + ; CHECK-NEXT: internal %125.sub2:vreg_128 = PRED_COPY %124.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %129.sub2:vreg_128 = COPY %128.sub2 + ; CHECK-NEXT: undef %129.sub2:vreg_128 = PRED_COPY %128.sub2 ; CHECK-NEXT: %129.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %130.sub0:vreg_128 = COPY %129.sub0 { - ; CHECK-NEXT: internal %130.sub2:vreg_128 = COPY %129.sub2 + ; CHECK-NEXT: undef %130.sub0:vreg_128 = PRED_COPY %129.sub0 { + ; CHECK-NEXT: internal %130.sub2:vreg_128 = PRED_COPY %129.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %134.sub2:vreg_128 = COPY %133.sub2 + ; CHECK-NEXT: undef %134.sub2:vreg_128 = PRED_COPY %133.sub2 ; CHECK-NEXT: %134.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef %135.sub0:vreg_128 = COPY %134.sub0 { - ; CHECK-NEXT: internal %135.sub2:vreg_128 = COPY %134.sub2 + ; CHECK-NEXT: undef %135.sub0:vreg_128 = PRED_COPY %134.sub0 { + ; CHECK-NEXT: internal %135.sub2:vreg_128 = PRED_COPY %134.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %145.sub2:vreg_128 = COPY %144.sub2 + ; CHECK-NEXT: undef %145.sub2:vreg_128 = PRED_COPY %144.sub2 ; CHECK-NEXT: %145.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef %146.sub0:vreg_128 = COPY %145.sub0 { - ; CHECK-NEXT: internal %146.sub2:vreg_128 = COPY %145.sub2 + ; CHECK-NEXT: undef %146.sub0:vreg_128 = PRED_COPY %145.sub0 { + ; CHECK-NEXT: internal %146.sub2:vreg_128 = PRED_COPY %145.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: undef %150.sub2:vreg_128 = COPY %149.sub2 + ; CHECK-NEXT: undef %150.sub2:vreg_128 = PRED_COPY %149.sub2 ; CHECK-NEXT: %150.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: undef %151.sub0:vreg_128 = COPY %150.sub0 { - ; CHECK-NEXT: internal %151.sub2:vreg_128 = COPY %150.sub2 + ; CHECK-NEXT: undef %151.sub0:vreg_128 = PRED_COPY %150.sub0 { + ; CHECK-NEXT: internal %151.sub2:vreg_128 = PRED_COPY %150.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %157.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %155.sub2:vreg_128 = COPY %157.sub2 + ; CHECK-NEXT: undef %155.sub2:vreg_128 = PRED_COPY %157.sub2 ; CHECK-NEXT: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %156.sub0:vreg_128 = COPY %155.sub0 { - ; CHECK-NEXT: internal %156.sub2:vreg_128 = COPY %155.sub2 + ; CHECK-NEXT: undef %156.sub0:vreg_128 = PRED_COPY %155.sub0 { + ; CHECK-NEXT: internal %156.sub2:vreg_128 = PRED_COPY %155.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %165.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %163.sub2:vreg_128 = COPY %165.sub2 + ; CHECK-NEXT: undef %163.sub2:vreg_128 = PRED_COPY %165.sub2 ; CHECK-NEXT: %163.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %164.sub0:vreg_128 = COPY %163.sub0 { - ; CHECK-NEXT: internal %164.sub2:vreg_128 = COPY %163.sub2 + ; CHECK-NEXT: undef %164.sub0:vreg_128 = PRED_COPY %163.sub0 { + ; CHECK-NEXT: internal %164.sub2:vreg_128 = PRED_COPY %163.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %176.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %174.sub2:vreg_128 = COPY %176.sub2 + ; CHECK-NEXT: undef %174.sub2:vreg_128 = PRED_COPY %176.sub2 ; CHECK-NEXT: %174.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %175.sub0:vreg_128 = COPY %174.sub0 { - ; CHECK-NEXT: internal %175.sub2:vreg_128 = COPY %174.sub2 + ; CHECK-NEXT: undef %175.sub0:vreg_128 = PRED_COPY %174.sub0 { + ; CHECK-NEXT: internal %175.sub2:vreg_128 = PRED_COPY %174.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: undef %195.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: undef %180.sub2:vreg_128 = COPY %195.sub2 + ; CHECK-NEXT: undef %180.sub2:vreg_128 = PRED_COPY %195.sub2 ; CHECK-NEXT: %180.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec ; CHECK-NEXT: undef %194.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec - ; CHECK-NEXT: undef %193.sub2:vreg_128 = COPY %194.sub2 + ; CHECK-NEXT: undef %193.sub2:vreg_128 = PRED_COPY %194.sub2 ; CHECK-NEXT: %193.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec @@ -205,199 +205,199 @@ body: | ; CHECK-NEXT: %42.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK-NEXT: %43.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK-NEXT: %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: %43.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %43.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: %42.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %42.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %42.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %42.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: %41.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %41.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %41.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %41.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: %40.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %40.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %40.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %40.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: %38.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %38.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %38.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %38.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: %37.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %37.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %37.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %37.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: %36.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %36.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %36.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %36.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %191.sub0:vreg_128 = COPY %193.sub0 { - ; CHECK-NEXT: internal %191.sub2:vreg_128 = COPY %193.sub2 + ; CHECK-NEXT: undef %191.sub0:vreg_128 = PRED_COPY %193.sub0 { + ; CHECK-NEXT: internal %191.sub2:vreg_128 = PRED_COPY %193.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %191.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %191.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %191.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %191.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %191, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %178.sub0:vreg_128 = COPY %180.sub0 { - ; CHECK-NEXT: internal %178.sub2:vreg_128 = COPY %180.sub2 + ; CHECK-NEXT: undef %178.sub0:vreg_128 = PRED_COPY %180.sub0 { + ; CHECK-NEXT: internal %178.sub2:vreg_128 = PRED_COPY %180.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %178.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %178.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %178.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %178.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %178, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %172.sub0:vreg_128 = COPY %175.sub0 { - ; CHECK-NEXT: internal %172.sub2:vreg_128 = COPY %175.sub2 + ; CHECK-NEXT: undef %172.sub0:vreg_128 = PRED_COPY %175.sub0 { + ; CHECK-NEXT: internal %172.sub2:vreg_128 = PRED_COPY %175.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %172.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %172.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %172.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %172.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %172, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %161.sub0:vreg_128 = COPY %164.sub0 { - ; CHECK-NEXT: internal %161.sub2:vreg_128 = COPY %164.sub2 + ; CHECK-NEXT: undef %161.sub0:vreg_128 = PRED_COPY %164.sub0 { + ; CHECK-NEXT: internal %161.sub2:vreg_128 = PRED_COPY %164.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %161.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %161.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %161.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %161.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %161, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %153.sub0:vreg_128 = COPY %156.sub0 { - ; CHECK-NEXT: internal %153.sub2:vreg_128 = COPY %156.sub2 + ; CHECK-NEXT: undef %153.sub0:vreg_128 = PRED_COPY %156.sub0 { + ; CHECK-NEXT: internal %153.sub2:vreg_128 = PRED_COPY %156.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %153.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %153.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %153.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %153.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %148.sub0:vreg_128 = COPY %151.sub0 { - ; CHECK-NEXT: internal %148.sub2:vreg_128 = COPY %151.sub2 + ; CHECK-NEXT: undef %148.sub0:vreg_128 = PRED_COPY %151.sub0 { + ; CHECK-NEXT: internal %148.sub2:vreg_128 = PRED_COPY %151.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %148.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %148.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %148.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %148.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %143.sub0:vreg_128 = COPY %146.sub0 { - ; CHECK-NEXT: internal %143.sub2:vreg_128 = COPY %146.sub2 + ; CHECK-NEXT: undef %143.sub0:vreg_128 = PRED_COPY %146.sub0 { + ; CHECK-NEXT: internal %143.sub2:vreg_128 = PRED_COPY %146.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %143.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %143.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %143.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %143.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %132.sub0:vreg_128 = COPY %135.sub0 { - ; CHECK-NEXT: internal %132.sub2:vreg_128 = COPY %135.sub2 + ; CHECK-NEXT: undef %132.sub0:vreg_128 = PRED_COPY %135.sub0 { + ; CHECK-NEXT: internal %132.sub2:vreg_128 = PRED_COPY %135.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %132.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %132.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %132.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %132.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %132, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - ; CHECK-NEXT: undef %127.sub0:vreg_128 = COPY %130.sub0 { - ; CHECK-NEXT: internal %127.sub2:vreg_128 = COPY %130.sub2 + ; CHECK-NEXT: undef %127.sub0:vreg_128 = PRED_COPY %130.sub0 { + ; CHECK-NEXT: internal %127.sub2:vreg_128 = PRED_COPY %130.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %127.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %127.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %127.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %127.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %127, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %125.sub0 { - ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %125.sub2 + ; CHECK-NEXT: undef %122.sub0:vreg_128 = PRED_COPY %125.sub0 { + ; CHECK-NEXT: internal %122.sub2:vreg_128 = PRED_COPY %125.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %122.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %122.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %122.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %122.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %117.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { - ; CHECK-NEXT: internal %117.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 + ; CHECK-NEXT: undef %117.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE]].sub0 { + ; CHECK-NEXT: internal %117.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %117.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %117.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %117.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %117.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %117, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %112.sub0:vreg_128 = COPY %115.sub0 { - ; CHECK-NEXT: internal %112.sub2:vreg_128 = COPY %115.sub2 + ; CHECK-NEXT: undef %112.sub0:vreg_128 = PRED_COPY %115.sub0 { + ; CHECK-NEXT: internal %112.sub2:vreg_128 = PRED_COPY %115.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %112.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %112.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %112.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %112.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { - ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 + ; CHECK-NEXT: undef %110.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]].sub0 { + ; CHECK-NEXT: internal %110.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE1]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %110.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %110.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %110.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %110.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %110, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %184.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { - ; CHECK-NEXT: internal %184.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 + ; CHECK-NEXT: undef %184.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE2]].sub0 { + ; CHECK-NEXT: internal %184.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE2]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %184.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %184.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %184.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %184.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %184, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %137.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { - ; CHECK-NEXT: internal %137.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 + ; CHECK-NEXT: undef %137.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE3]].sub0 { + ; CHECK-NEXT: internal %137.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE3]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %137.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %137.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %137.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %137.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %137, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %103.sub0:vreg_128 = COPY %106.sub0 { - ; CHECK-NEXT: internal %103.sub2:vreg_128 = COPY %106.sub2 + ; CHECK-NEXT: undef %103.sub0:vreg_128 = PRED_COPY %106.sub0 { + ; CHECK-NEXT: internal %103.sub2:vreg_128 = PRED_COPY %106.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %103.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %103.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %103.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %103.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %103, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %98.sub0:vreg_128 = COPY %101.sub0 { - ; CHECK-NEXT: internal %98.sub2:vreg_128 = COPY %101.sub2 + ; CHECK-NEXT: undef %98.sub0:vreg_128 = PRED_COPY %101.sub0 { + ; CHECK-NEXT: internal %98.sub2:vreg_128 = PRED_COPY %101.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %98.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %98.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %98.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %98.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %98, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %93.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { - ; CHECK-NEXT: internal %93.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 + ; CHECK-NEXT: undef %93.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE4]].sub0 { + ; CHECK-NEXT: internal %93.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE4]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %93.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %93.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %93.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %93.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %93, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %88.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { - ; CHECK-NEXT: internal %88.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 + ; CHECK-NEXT: undef %88.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE5]].sub0 { + ; CHECK-NEXT: internal %88.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE5]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %88.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %88.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %88.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %88.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %88, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { - ; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 + ; CHECK-NEXT: undef %81.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE6]].sub0 { + ; CHECK-NEXT: internal %81.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE6]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %81.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %81.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %81.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %81.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %81, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %74.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { - ; CHECK-NEXT: internal %74.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 + ; CHECK-NEXT: undef %74.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE7]].sub0 { + ; CHECK-NEXT: internal %74.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE7]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %74.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %74.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %74.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %74.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %74, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %67.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { - ; CHECK-NEXT: internal %67.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 + ; CHECK-NEXT: undef %67.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE8]].sub0 { + ; CHECK-NEXT: internal %67.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE8]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %67.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %67.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %67.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %67.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %67, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %60.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { - ; CHECK-NEXT: internal %60.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 + ; CHECK-NEXT: undef %60.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE9]].sub0 { + ; CHECK-NEXT: internal %60.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE9]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %60.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %60.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %60.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %60.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %60, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %53.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { - ; CHECK-NEXT: internal %53.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 + ; CHECK-NEXT: undef %53.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE10]].sub0 { + ; CHECK-NEXT: internal %53.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE10]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %53.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %53.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %53.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %53.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %53, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { - ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 + ; CHECK-NEXT: undef %46.sub0:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE11]].sub0 { + ; CHECK-NEXT: internal %46.sub2:vreg_128 = PRED_COPY [[SI_SPILL_V128_RESTORE11]].sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %46.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %46.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %46.sub1:vreg_128 = PRED_COPY %43.sub1 + ; CHECK-NEXT: %46.sub3:vreg_128 = PRED_COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 - %0:sgpr_64(p4) = COPY $sgpr0_sgpr1 + %0:sgpr_64(p4) = PRED_COPY $sgpr0_sgpr1 %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) undef %2.sub3:sgpr_128 = S_MOV_B32 61440 %2.sub2:sgpr_128 = S_MOV_B32 -1 - %2.sub0:sgpr_128 = COPY %1.sub0 - %2.sub1:sgpr_128 = COPY %1.sub1 - undef %3.sub0:sgpr_128 = COPY %1.sub2 - %3.sub1:sgpr_128 = COPY %1.sub3 - %3.sub2:sgpr_128 = COPY %2.sub2 - %3.sub3:sgpr_128 = COPY %2.sub3 + %2.sub0:sgpr_128 = PRED_COPY %1.sub0 + %2.sub1:sgpr_128 = PRED_COPY %1.sub1 + undef %3.sub0:sgpr_128 = PRED_COPY %1.sub2 + %3.sub1:sgpr_128 = PRED_COPY %1.sub3 + %3.sub2:sgpr_128 = PRED_COPY %2.sub2 + %3.sub3:sgpr_128 = PRED_COPY %2.sub3 early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) @@ -474,100 +474,100 @@ body: | %42.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub3, implicit $exec %43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec - %43.sub3:vreg_128 = COPY %43.sub1 + %43.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %42.sub1:vreg_128 = COPY %43.sub1 - %42.sub3:vreg_128 = COPY %43.sub1 + %42.sub1:vreg_128 = PRED_COPY %43.sub1 + %42.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %41.sub1:vreg_128 = COPY %43.sub1 - %41.sub3:vreg_128 = COPY %43.sub1 + %41.sub1:vreg_128 = PRED_COPY %43.sub1 + %41.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - %40.sub1:vreg_128 = COPY %43.sub1 - %40.sub3:vreg_128 = COPY %43.sub1 + %40.sub1:vreg_128 = PRED_COPY %43.sub1 + %40.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %38.sub1:vreg_128 = COPY %43.sub1 - %38.sub3:vreg_128 = COPY %43.sub1 + %38.sub1:vreg_128 = PRED_COPY %43.sub1 + %38.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %37.sub1:vreg_128 = COPY %43.sub1 - %37.sub3:vreg_128 = COPY %43.sub1 + %37.sub1:vreg_128 = PRED_COPY %43.sub1 + %37.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %36.sub1:vreg_128 = COPY %43.sub1 - %36.sub3:vreg_128 = COPY %43.sub1 + %36.sub1:vreg_128 = PRED_COPY %43.sub1 + %36.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - %35.sub1:vreg_128 = COPY %43.sub1 - %35.sub3:vreg_128 = COPY %43.sub1 + %35.sub1:vreg_128 = PRED_COPY %43.sub1 + %35.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %33.sub1:vreg_128 = COPY %43.sub1 - %33.sub3:vreg_128 = COPY %43.sub1 + %33.sub1:vreg_128 = PRED_COPY %43.sub1 + %33.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %32.sub1:vreg_128 = COPY %43.sub1 - %32.sub3:vreg_128 = COPY %43.sub1 + %32.sub1:vreg_128 = PRED_COPY %43.sub1 + %32.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %31.sub1:vreg_128 = COPY %43.sub1 - %31.sub3:vreg_128 = COPY %43.sub1 + %31.sub1:vreg_128 = PRED_COPY %43.sub1 + %31.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - %30.sub1:vreg_128 = COPY %43.sub1 - %30.sub3:vreg_128 = COPY %43.sub1 + %30.sub1:vreg_128 = PRED_COPY %43.sub1 + %30.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %28.sub1:vreg_128 = COPY %43.sub1 - %28.sub3:vreg_128 = COPY %43.sub1 + %28.sub1:vreg_128 = PRED_COPY %43.sub1 + %28.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %27.sub1:vreg_128 = COPY %43.sub1 - %27.sub3:vreg_128 = COPY %43.sub1 + %27.sub1:vreg_128 = PRED_COPY %43.sub1 + %27.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %26.sub1:vreg_128 = COPY %43.sub1 - %26.sub3:vreg_128 = COPY %43.sub1 + %26.sub1:vreg_128 = PRED_COPY %43.sub1 + %26.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - %25.sub1:vreg_128 = COPY %43.sub1 - %25.sub3:vreg_128 = COPY %43.sub1 + %25.sub1:vreg_128 = PRED_COPY %43.sub1 + %25.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %23.sub1:vreg_128 = COPY %43.sub1 - %23.sub3:vreg_128 = COPY %43.sub1 + %23.sub1:vreg_128 = PRED_COPY %43.sub1 + %23.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %22.sub1:vreg_128 = COPY %43.sub1 - %22.sub3:vreg_128 = COPY %43.sub1 + %22.sub1:vreg_128 = PRED_COPY %43.sub1 + %22.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %21.sub1:vreg_128 = COPY %43.sub1 - %21.sub3:vreg_128 = COPY %43.sub1 + %21.sub1:vreg_128 = PRED_COPY %43.sub1 + %21.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - %20.sub1:vreg_128 = COPY %43.sub1 - %20.sub3:vreg_128 = COPY %43.sub1 + %20.sub1:vreg_128 = PRED_COPY %43.sub1 + %20.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %19.sub1:vreg_128 = COPY %43.sub1 - %19.sub3:vreg_128 = COPY %43.sub1 + %19.sub1:vreg_128 = PRED_COPY %43.sub1 + %19.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %18.sub1:vreg_128 = COPY %43.sub1 - %18.sub3:vreg_128 = COPY %43.sub1 + %18.sub1:vreg_128 = PRED_COPY %43.sub1 + %18.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %17.sub1:vreg_128 = COPY %43.sub1 - %17.sub3:vreg_128 = COPY %43.sub1 + %17.sub1:vreg_128 = PRED_COPY %43.sub1 + %17.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - %16.sub1:vreg_128 = COPY %43.sub1 - %16.sub3:vreg_128 = COPY %43.sub1 + %16.sub1:vreg_128 = PRED_COPY %43.sub1 + %16.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %15.sub1:vreg_128 = COPY %43.sub1 - %15.sub3:vreg_128 = COPY %43.sub1 + %15.sub1:vreg_128 = PRED_COPY %43.sub1 + %15.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %14.sub1:vreg_128 = COPY %43.sub1 - %14.sub3:vreg_128 = COPY %43.sub1 + %14.sub1:vreg_128 = PRED_COPY %43.sub1 + %14.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %13.sub1:vreg_128 = COPY %43.sub1 - %13.sub3:vreg_128 = COPY %43.sub1 + %13.sub1:vreg_128 = PRED_COPY %43.sub1 + %13.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - %12.sub1:vreg_128 = COPY %43.sub1 - %12.sub3:vreg_128 = COPY %43.sub1 + %12.sub1:vreg_128 = PRED_COPY %43.sub1 + %12.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %11.sub1:vreg_128 = COPY %43.sub1 - %11.sub3:vreg_128 = COPY %43.sub1 + %11.sub1:vreg_128 = PRED_COPY %43.sub1 + %11.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - %10.sub1:vreg_128 = COPY %43.sub1 - %10.sub3:vreg_128 = COPY %43.sub1 + %10.sub1:vreg_128 = PRED_COPY %43.sub1 + %10.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - %9.sub1:vreg_128 = COPY %43.sub1 - %9.sub3:vreg_128 = COPY %43.sub1 + %9.sub1:vreg_128 = PRED_COPY %43.sub1 + %9.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) - %8.sub1:vreg_128 = COPY %43.sub1 - %8.sub3:vreg_128 = COPY %43.sub1 + %8.sub1:vreg_128 = PRED_COPY %43.sub1 + %8.sub3:vreg_128 = PRED_COPY %43.sub1 BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index 9551c2e8dfd83..a86e1f18ff3d1 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -15,30 +15,30 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: bb.0..expVert: ; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %56.sub0:sgpr_64 = COPY $sgpr31 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18 - ; CHECK-NEXT: undef %50.sub0:sgpr_64 = COPY $sgpr19 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8 + ; CHECK-NEXT: undef %56.sub0:sgpr_64 = PRED_COPY $sgpr31 + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr27 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr25 + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr5 + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr4 + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr3 + ; CHECK-NEXT: [[PRED_COPY5:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr18 + ; CHECK-NEXT: undef %50.sub0:sgpr_64 = PRED_COPY $sgpr19 + ; CHECK-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr20 + ; CHECK-NEXT: [[PRED_COPY7:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr21 + ; CHECK-NEXT: [[PRED_COPY8:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr22 + ; CHECK-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr23 + ; CHECK-NEXT: [[PRED_COPY10:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr9 + ; CHECK-NEXT: [[PRED_COPY11:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr10 + ; CHECK-NEXT: [[PRED_COPY12:%[0-9]+]]:sgpr_32 = PRED_COPY $sgpr8 ; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (invariant load (s64) from %ir.40, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY4]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY3]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 4, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc ; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (invariant load (s128) from %ir.84, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `<4 x i32> addrspace(4)* undef`, addrspace 4) @@ -49,13 +49,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: undef %302.sub1:sgpr_128 = S_MOV_B32 0 - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: KILL undef %89:sgpr_128 ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc ; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY5]], 64, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc @@ -89,11 +89,11 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc ; CHECK-NEXT: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY6]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY7]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY7]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc @@ -101,7 +101,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %302, undef %314:sreg_32, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %302, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %302, 16, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4) @@ -109,35 +109,35 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (invariant load (s128) from %ir.111, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (invariant load (s128) from %ir.123, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc - ; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY8]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc + ; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc + ; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (invariant load (s128) from %ir.131, addrspace 4) ; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc + ; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[PRED_COPY9]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY10]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (invariant load (s128) from %ir.155, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (invariant load (s128) from %ir.138, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (invariant load (s128) from %ir.144, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (invariant load (s128) from %ir.150, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc @@ -146,102 +146,102 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -329, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -345, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR6]], -441, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY1]], 160, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY11]], 4, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc ; CHECK-NEXT: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc ; CHECK-NEXT: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (invariant load (s128) from %ir.162, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY4]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc ; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc ; CHECK-NEXT: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (invariant load (s32) from %ir..i085.i, align 8, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (invariant load (s128) from %ir.176, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY13:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (invariant load (s128) from %ir.185, addrspace 4) - ; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1 - ; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[PRED_COPY13]].sub1:sgpr_128 = PRED_COPY %302.sub1 + ; CHECK-NEXT: [[PRED_COPY13]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY13]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (invariant load (s128) from %ir.194, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (invariant load (s128) from %ir.200, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY3]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc ; CHECK-NEXT: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc ; CHECK-NEXT: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (invariant load (s64) from %ir.308, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (invariant load (s128) from %ir.223, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (invariant load (s128) from %ir.230, addrspace 4) - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY14:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (invariant load (s128) from %ir.236, addrspace 4) ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 - ; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY14]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: [[PRED_COPY14]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY14]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (invariant load (s128) from %ir.242, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY2]], 3, implicit-def dead $scc + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc ; CHECK-NEXT: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc ; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (invariant load (s64) from %ir.320, addrspace 4) - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71 + ; CHECK-NEXT: [[PRED_COPY15:%[0-9]+]]:sgpr_128 = PRED_COPY %71 ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0 - ; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY15]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORDX2_IMM1]].sub0 + ; CHECK-NEXT: [[PRED_COPY15]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_1]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY15]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (invariant load (s128) from %ir.282, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: KILL %411.sub0, %411.sub1 ; CHECK-NEXT: KILL undef %488:sreg_64 - ; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3 - ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc + ; CHECK-NEXT: KILL [[PRED_COPY15]].sub0_sub1, [[PRED_COPY15]].sub2_sub3 + ; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[PRED_COPY12]], 3, implicit-def dead $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.291, addrspace 4) ; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc ; CHECK-NEXT: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc ; CHECK-NEXT: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (invariant load (s32) from %ir..i0100.i, align 8, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY %71 - ; CHECK-NEXT: [[COPY16]].sub1:sgpr_128 = COPY [[S_AND_B32_2]] - ; CHECK-NEXT: [[COPY16]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]] - ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[PRED_COPY16:%[0-9]+]]:sgpr_128 = PRED_COPY %71 + ; CHECK-NEXT: [[PRED_COPY16]].sub1:sgpr_128 = PRED_COPY [[S_AND_B32_2]] + ; CHECK-NEXT: [[PRED_COPY16]].sub0:sgpr_128 = PRED_COPY [[S_LOAD_DWORD_IMM2]] + ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[PRED_COPY16]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc ; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc + ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PRED_COPY]], 96, implicit-def $scc ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc @@ -252,9 +252,9 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK-NEXT: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (invariant load (s128) from %ir.363, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) - ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) + ; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7) ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]] @@ -376,7 +376,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec ; CHECK-NEXT: undef %693.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 %693, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "ImageResource") + ; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 %693, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir index 8f4de729388c9..096749d2e30f6 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir @@ -34,13 +34,13 @@ body: | # allocated to sgpr0_sgpr1 and the first to something else so we see two copies # in between for the two subregisters that are alive. # CHECK-LABEL: name: func1 -# CHECK: [[REG0:\$sgpr[0-9]+]] = COPY $sgpr0 -# CHECK: [[REG1:\$sgpr[0-9]+]] = COPY $sgpr2 +# CHECK: [[REG0:\$sgpr[0-9]+]] = PRED_COPY $sgpr0 +# CHECK: [[REG1:\$sgpr[0-9]+]] = PRED_COPY $sgpr2 # CHECK: S_NOP 0 # CHECK: S_NOP 0, implicit renamable [[REG0]] # CHECK: S_NOP 0, implicit renamable [[REG1]] -# CHECK: $sgpr0 = COPY killed renamable [[REG0]] -# CHECK: $sgpr2 = COPY renamable [[REG1]] +# CHECK: $sgpr0 = PRED_COPY killed renamable [[REG0]] +# CHECK: $sgpr2 = PRED_COPY renamable [[REG1]] # CHECK: S_NOP # CHECK: S_NOP 0, implicit renamable $sgpr0 # CHECK: S_NOP 0, implicit killed renamable $sgpr2 @@ -49,8 +49,8 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 - undef %0.sub0 : sgpr_128 = COPY $sgpr0 - %0.sub2 = COPY $sgpr2 + undef %0.sub0 : sgpr_128 = PRED_COPY $sgpr0 + %0.sub2 = PRED_COPY $sgpr2 S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 8fdf6d1683ebb..454c61183e33f 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_mul_lo_u32 v4, s1, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_mul_lo_u32 v3, s0, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -104,19 +104,19 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v5, s11 +; GCN-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; @@ -124,41 +124,38 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: s_flbit_i32_b32 s10, s4 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s11, s5 -; GCN-IR-NEXT: s_add_i32 s10, s10, 32 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s10, s11 -; GCN-IR-NEXT: s_min_u32 s14, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s12, s10, s14 -; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[8:9], s[16:17] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[16:17], exec -; GCN-IR-NEXT: s_cselect_b32 s9, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s8, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 +; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s12, s4 +; GCN-IR-NEXT: s_add_i32 s14, s12, 32 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[10:11] +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s5 +; GCN-IR-NEXT: s_min_u32 s10, s14, s8 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s14, s8, s9 +; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 +; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s16, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s17, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[16:17], 0 -; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 +; GCN-IR-NEXT: s_add_u32 s12, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s16 +; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s12 ; GCN-IR-NEXT: s_add_u32 s16, s4, -1 ; GCN-IR-NEXT: s_addc_u32 s17, s5, -1 ; GCN-IR-NEXT: s_not_b64 s[6:7], s[10:11] @@ -189,24 +186,30 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GCN-IR-NEXT: .LBB0_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-IR-NEXT: s_mov_b32 s12, s0 -; GCN-IR-NEXT: s_mul_i32 s0, s4, s9 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 +; GCN-IR-NEXT: s_branch .LBB0_6 +; GCN-IR-NEXT: .LBB0_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[12:13] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[12:13] +; GCN-IR-NEXT: .LBB0_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s4, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s4, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 +; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s5, s8 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s4, s8 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 -; GCN-IR-NEXT: s_mov_b32 s15, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s14, -1 -; GCN-IR-NEXT: s_mov_b32 s13, s1 +; GCN-IR-NEXT: s_mov_b32 s10, -1 +; GCN-IR-NEXT: s_mov_b32 s8, s0 +; GCN-IR-NEXT: s_mov_b32 s9, s1 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GCN-IR-NEXT: s_endpgm %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -448,72 +451,66 @@ define i64 @v_test_srem(i64 %x, i64 %y) { define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem23_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 41 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 41 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 41 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 41 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-NEXT: s_xor_b32 s3, s2, s4 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-NEXT: s_xor_b32 s5, s4, s8 -; GCN-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-NEXT: s_or_b32 s5, s5, 1 +; GCN-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-NEXT: s_or_b32 s3, s3, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-NEXT: s_add_i32 s5, s6, s5 -; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: s_sub_i32 s4, s4, s5 -; GCN-NEXT: s_bfe_i32 s4, s4, 0x170000 -; GCN-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 23 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem23_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 41 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-IR-NEXT: s_mov_b32 s1, s5 -; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 41 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[4:5], 41 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 41 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-IR-NEXT: s_xor_b32 s3, s2, s4 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-IR-NEXT: s_xor_b32 s5, s4, s8 -; GCN-IR-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-IR-NEXT: s_or_b32 s5, s5, 1 +; GCN-IR-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-IR-NEXT: s_or_b32 s3, s3, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s3 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-IR-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-IR-NEXT: s_add_i32 s5, s6, s5 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_sub_i32 s4, s4, s5 -; GCN-IR-NEXT: s_bfe_i32 s4, s4, 0x170000 -; GCN-IR-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i64 %x, 41 %2 = ashr i64 %y, 41 @@ -525,72 +522,66 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem24_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 40 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 40 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 40 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-NEXT: s_xor_b32 s3, s2, s4 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-NEXT: s_xor_b32 s5, s4, s8 -; GCN-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-NEXT: s_or_b32 s5, s5, 1 +; GCN-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-NEXT: s_or_b32 s3, s3, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-NEXT: s_add_i32 s5, s6, s5 -; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: s_sub_i32 s4, s4, s5 -; GCN-NEXT: s_bfe_i32 s4, s4, 0x180000 -; GCN-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem24_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 40 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-IR-NEXT: s_mov_b32 s1, s5 -; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 40 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[4:5], 40 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-IR-NEXT: s_xor_b32 s3, s2, s4 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-IR-NEXT: s_xor_b32 s5, s4, s8 -; GCN-IR-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-IR-NEXT: s_or_b32 s5, s5, 1 +; GCN-IR-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-IR-NEXT: s_or_b32 s3, s3, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s3 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-IR-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-IR-NEXT: s_add_i32 s5, s6, s5 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_sub_i32 s4, s4, s5 -; GCN-IR-NEXT: s_bfe_i32 s4, s4, 0x180000 -; GCN-IR-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i64 %x, 40 %2 = ashr i64 %y, 40 @@ -656,72 +647,66 @@ define i64 @v_test_srem24_64(i64 %x, i64 %y) { define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem25_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 39 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 39 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 39 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 39 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-NEXT: s_xor_b32 s3, s2, s4 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-NEXT: s_xor_b32 s5, s4, s8 -; GCN-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-NEXT: s_or_b32 s5, s5, 1 +; GCN-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-NEXT: s_or_b32 s3, s3, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-NEXT: s_add_i32 s5, s6, s5 -; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: s_sub_i32 s4, s4, s5 -; GCN-NEXT: s_bfe_i32 s4, s4, 0x190000 -; GCN-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 25 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem25_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 39 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-IR-NEXT: s_mov_b32 s1, s5 -; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 39 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[4:5], 39 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 39 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-IR-NEXT: s_xor_b32 s3, s2, s4 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-IR-NEXT: s_xor_b32 s5, s4, s8 -; GCN-IR-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-IR-NEXT: s_or_b32 s5, s5, 1 +; GCN-IR-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-IR-NEXT: s_or_b32 s3, s3, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s3 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-IR-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-IR-NEXT: s_add_i32 s5, s6, s5 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_sub_i32 s4, s4, s5 -; GCN-IR-NEXT: s_bfe_i32 s4, s4, 0x190000 -; GCN-IR-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i64 %x, 39 %2 = ashr i64 %y, 39 @@ -733,72 +718,66 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem31_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-NEXT: s_mov_b32 s3, 0xf000 -; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 33 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 33 -; GCN-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 33 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 33 +; GCN-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-NEXT: s_xor_b32 s3, s2, s4 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-NEXT: s_xor_b32 s5, s4, s8 -; GCN-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-NEXT: s_or_b32 s5, s5, 1 +; GCN-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-NEXT: s_or_b32 s3, s3, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-NEXT: s_add_i32 s5, s6, s5 -; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: s_sub_i32 s4, s4, s5 -; GCN-NEXT: s_bfe_i32 s4, s4, 0x1f0000 -; GCN-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem31_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe -; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s2, -1 +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe +; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 33 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 -; GCN-IR-NEXT: s_mov_b32 s1, s5 -; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 33 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s4 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[4:5], 33 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 33 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s2 +; GCN-IR-NEXT: s_xor_b32 s3, s2, s4 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 -; GCN-IR-NEXT: s_xor_b32 s5, s4, s8 -; GCN-IR-NEXT: s_ashr_i32 s5, s5, 30 -; GCN-IR-NEXT: s_or_b32 s5, s5, 1 +; GCN-IR-NEXT: s_ashr_i32 s3, s3, 30 +; GCN-IR-NEXT: s_or_b32 s3, s3, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s3 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[6:7], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec -; GCN-IR-NEXT: s_cselect_b32 s5, s5, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s6, v2 -; GCN-IR-NEXT: s_add_i32 s5, s6, s5 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_sub_i32 s4, s4, s5 -; GCN-IR-NEXT: s_bfe_i32 s4, s4, 0x1f0000 -; GCN-IR-NEXT: s_ashr_i32 s5, s4, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: s_mov_b32 s5, s1 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i64 %x, 33 %2 = ashr i64 %y, 33 @@ -811,28 +790,28 @@ define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem32_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s8, s[0:1], 0xe +; GCN-NEXT: s_load_dword s4, s[0:1], 0xe ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 ; GCN-NEXT: v_cvt_f32_i32_e32 v1, s3 -; GCN-NEXT: s_xor_b32 s2, s3, s8 +; GCN-NEXT: s_xor_b32 s2, s3, s4 ; GCN-NEXT: s_ashr_i32 s2, s2, 30 ; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-NEXT: s_or_b32 s2, s2, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s2 +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-NEXT: s_cselect_b32 s2, s2, 0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, s2, v2 -; GCN-NEXT: v_mul_lo_u32 v0, v0, s8 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-NEXT: s_mov_b32 s4, s0 -; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s3, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -840,28 +819,28 @@ define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64 ; ; GCN-IR-LABEL: s_test_srem32_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dword s8, s[0:1], 0xe +; GCN-IR-NEXT: s_load_dword s4, s[0:1], 0xe ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v1, s3 -; GCN-IR-NEXT: s_xor_b32 s2, s3, s8 +; GCN-IR-NEXT: s_xor_b32 s2, s3, s4 ; GCN-IR-NEXT: s_ashr_i32 s2, s2, 30 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v2, v0 ; GCN-IR-NEXT: s_or_b32 s2, s2, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s2 +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_mul_f32_e32 v2, v1, v2 ; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-IR-NEXT: v_mad_f32 v1, -v2, v0, v1 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, |v0| -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s2, s2, 0 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s2, v2 -; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s8 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 -; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -932,7 +911,7 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_mul_lo_u32 v4, s1, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_mul_lo_u32 v3, s0, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -988,19 +967,19 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v4, s15 -; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v5, s15 +; GCN-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: v_xor_b32_e32 v0, s6, v0 ; GCN-NEXT: v_xor_b32_e32 v1, s6, v1 ; GCN-NEXT: v_mov_b32_e32 v2, s6 @@ -1013,7 +992,6 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[6:7], 31 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[0:1], 31 @@ -1027,39 +1005,37 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s0 ; GCN-IR-NEXT: s_sub_u32 s8, s6, s10 ; GCN-IR-NEXT: s_subb_u32 s9, s7, s10 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[8:9], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s8 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s9 -; GCN-IR-NEXT: s_min_u32 s12, s6, s7 -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s16, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s14, s12, s16 -; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[14:15], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[14:15], 63 -; GCN-IR-NEXT: s_or_b64 s[18:19], s[10:11], s[18:19] -; GCN-IR-NEXT: s_and_b64 s[10:11], s[18:19], exec -; GCN-IR-NEXT: s_cselect_b32 s11, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s10, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[18:19], s[18:19], s[20:21] +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 +; GCN-IR-NEXT: s_or_b64 s[14:15], s[10:11], s[12:13] +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s8 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s9 +; GCN-IR-NEXT: s_min_u32 s12, s10, s11 +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s2 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s3 +; GCN-IR-NEXT: s_min_u32 s16, s10, s11 +; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 +; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 +; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s18, s14, 1 -; GCN-IR-NEXT: s_addc_u32 s19, s15, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[18:19], 0 -; GCN-IR-NEXT: s_sub_i32 s14, 63, s14 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[2:3], s14 +; GCN-IR-NEXT: s_add_u32 s14, s10, 1 +; GCN-IR-NEXT: s_addc_u32 s15, s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 +; GCN-IR-NEXT: s_sub_i32 s10, 63, s10 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[2:3], s10 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[14:15], s[2:3], s18 +; GCN-IR-NEXT: s_lshr_b64 s[14:15], s[2:3], s14 ; GCN-IR-NEXT: s_add_u32 s18, s8, -1 ; GCN-IR-NEXT: s_addc_u32 s19, s9, -1 ; GCN-IR-NEXT: s_not_b64 s[6:7], s[12:13] @@ -1090,25 +1066,32 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: .LBB8_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s10 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s8, v0 -; GCN-IR-NEXT: s_mul_i32 s11, s8, s11 -; GCN-IR-NEXT: s_mul_i32 s9, s9, s10 -; GCN-IR-NEXT: s_mul_i32 s8, s8, s10 -; GCN-IR-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-IR-NEXT: s_add_i32 s11, s12, s11 -; GCN-IR-NEXT: s_add_i32 s11, s11, s9 -; GCN-IR-NEXT: s_sub_u32 s2, s2, s8 -; GCN-IR-NEXT: s_subb_u32 s3, s3, s11 -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] -; GCN-IR-NEXT: s_sub_u32 s0, s2, s0 -; GCN-IR-NEXT: s_subb_u32 s1, s3, s1 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 +; GCN-IR-NEXT: s_branch .LBB8_6 +; GCN-IR-NEXT: .LBB8_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[14:15] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[14:15] +; GCN-IR-NEXT: .LBB8_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s8, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s8, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s9, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s8, v0 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, s0, v0 +; GCN-IR-NEXT: v_xor_b32_e32 v1, s1, v1 +; GCN-IR-NEXT: v_mov_b32_e32 v2, s1 +; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 ; GCN-IR-NEXT: s_mov_b32 s6, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s1 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i64 %x, 31 @@ -1145,10 +1128,10 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v1| ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v4 ; GCN-NEXT: v_mul_lo_u32 v0, v1, v0 ; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: v_subrev_i32_e32 v0, vcc, v0, v2 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1158,7 +1141,6 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-LABEL: s_test_srem24_48: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_sext_i32_i16 s5, s5 ; GCN-IR-NEXT: s_sext_i32_i16 s7, s7 @@ -1178,39 +1160,37 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_subb_u32 s5, s5, s2 ; GCN-IR-NEXT: s_sub_u32 s6, s6, s10 ; GCN-IR-NEXT: s_subb_u32 s7, s7, s10 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[6:7], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s6 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s7 -; GCN-IR-NEXT: s_min_u32 s12, s8, s9 -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s4 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s5 -; GCN-IR-NEXT: s_min_u32 s16, s8, s9 -; GCN-IR-NEXT: s_sub_u32 s14, s12, s16 -; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[14:15], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[14:15], 63 -; GCN-IR-NEXT: s_or_b64 s[18:19], s[10:11], s[18:19] -; GCN-IR-NEXT: s_and_b64 s[10:11], s[18:19], exec -; GCN-IR-NEXT: s_cselect_b32 s11, 0, s5 -; GCN-IR-NEXT: s_cselect_b32 s10, 0, s4 -; GCN-IR-NEXT: s_or_b64 s[18:19], s[18:19], s[20:21] +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[4:5], 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: s_or_b64 s[14:15], s[10:11], s[12:13] +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s6 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s7 +; GCN-IR-NEXT: s_min_u32 s12, s10, s11 +; GCN-IR-NEXT: s_flbit_i32_b32 s10, s4 +; GCN-IR-NEXT: s_add_i32 s10, s10, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s11, s5 +; GCN-IR-NEXT: s_min_u32 s16, s10, s11 +; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 +; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 +; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s18, s14, 1 -; GCN-IR-NEXT: s_addc_u32 s19, s15, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[18:19], 0 -; GCN-IR-NEXT: s_sub_i32 s14, 63, s14 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[4:5], s14 +; GCN-IR-NEXT: s_add_u32 s14, s10, 1 +; GCN-IR-NEXT: s_addc_u32 s15, s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 +; GCN-IR-NEXT: s_sub_i32 s10, 63, s10 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[14:15], s[4:5], s18 +; GCN-IR-NEXT: s_lshr_b64 s[14:15], s[4:5], s14 ; GCN-IR-NEXT: s_add_u32 s18, s6, -1 ; GCN-IR-NEXT: s_addc_u32 s19, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[8:9], s[12:13] @@ -1241,30 +1221,36 @@ define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_3 ; GCN-IR-NEXT: .LBB9_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-IR-NEXT: .LBB9_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s10 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s6, v0 -; GCN-IR-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x9 -; GCN-IR-NEXT: s_mul_i32 s0, s6, s11 +; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s9 +; GCN-IR-NEXT: s_branch .LBB9_6 +; GCN-IR-NEXT: .LBB9_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s5 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[14:15] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[14:15] +; GCN-IR-NEXT: .LBB9_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s6, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s6, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s7, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s6, v0 +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s5 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s7, s10 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s6, s10 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s0 -; GCN-IR-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 -; GCN-IR-NEXT: v_subb_u32_e32 v0, vcc, v2, v0, vcc -; GCN-IR-NEXT: v_xor_b32_e32 v1, s2, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, s3, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, s2, v0 +; GCN-IR-NEXT: v_xor_b32_e32 v1, s3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 -; GCN-IR-NEXT: v_subrev_i32_e32 v1, vcc, s2, v1 -; GCN-IR-NEXT: s_mov_b32 s15, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s14, -1 -; GCN-IR-NEXT: v_subb_u32_e32 v0, vcc, v0, v2, vcc +; GCN-IR-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s10, -1 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: buffer_store_short v0, off, s[12:15], 0 offset:4 -; GCN-IR-NEXT: buffer_store_dword v1, off, s[12:15], 0 +; GCN-IR-NEXT: buffer_store_short v1, off, s[8:11], 0 offset:4 +; GCN-IR-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GCN-IR-NEXT: s_endpgm %1 = ashr i48 %x, 24 %2 = ashr i48 %y, 24 @@ -1324,9 +1310,9 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_lo_u32 v2, s2, v1 ; GCN-NEXT: v_mul_hi_u32 v3, s2, v0 ; GCN-NEXT: v_mul_lo_u32 v4, s3, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_mul_lo_u32 v3, s2, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -1370,52 +1356,49 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_ashr_i32 s8, s3, 31 -; GCN-IR-NEXT: s_mov_b32 s9, s8 -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s2, s8 -; GCN-IR-NEXT: s_subb_u32 s5, s3, s8 -; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4 -; GCN-IR-NEXT: s_add_i32 s2, s2, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5 -; GCN-IR-NEXT: s_min_u32 s8, s2, s3 -; GCN-IR-NEXT: s_add_u32 s2, s8, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[2:3], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[2:3], 63 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[10:11], s[12:13] -; GCN-IR-NEXT: s_and_b64 s[10:11], s[12:13], exec -; GCN-IR-NEXT: s_cselect_b32 s10, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: s_ashr_i32 s6, s3, 31 +; GCN-IR-NEXT: s_mov_b32 s7, s6 +; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 +; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 +; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3] +; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s10, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s3, 0 +; GCN-IR-NEXT: s_add_u32 s10, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s11, s7, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[10:11], 0 -; GCN-IR-NEXT: s_sub_i32 s2, 63, s2 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] -; GCN-IR-NEXT: s_lshl_b64 s[2:3], 24, s2 +; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 @@ -1424,41 +1407,46 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_sub_u32 s8, 58, s8 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 -; GCN-IR-NEXT: s_mov_b32 s7, 0 +; GCN-IR-NEXT: s_mov_b32 s3, 0 ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 -; GCN-IR-NEXT: s_lshr_b32 s6, s3, 31 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[6:7] -; GCN-IR-NEXT: s_or_b64 s[2:3], s[12:13], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s6, s14, s10 -; GCN-IR-NEXT: s_subb_u32 s6, s15, s11 -; GCN-IR-NEXT: s_ashr_i32 s12, s6, 31 +; GCN-IR-NEXT: s_lshr_b32 s2, s7, 31 +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[2:3] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s2, s14, s10 +; GCN-IR-NEXT: s_subb_u32 s2, s15, s11 +; GCN-IR-NEXT: s_ashr_i32 s12, s2, 31 ; GCN-IR-NEXT: s_mov_b32 s13, s12 -; GCN-IR-NEXT: s_and_b32 s6, s12, 1 +; GCN-IR-NEXT: s_and_b32 s2, s12, 1 ; GCN-IR-NEXT: s_and_b64 s[12:13], s[12:13], s[4:5] ; GCN-IR-NEXT: s_sub_u32 s10, s10, s12 ; GCN-IR-NEXT: s_subb_u32 s11, s11, s13 ; GCN-IR-NEXT: s_add_u32 s8, s8, 1 ; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7] +; GCN-IR-NEXT: s_mov_b64 s[12:13], s[2:3] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow5 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[2:3] -; GCN-IR-NEXT: .LBB10_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s10 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-IR-NEXT: s_mul_i32 s6, s4, s11 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s10 -; GCN-IR-NEXT: s_mul_i32 s4, s4, s10 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s6, v0 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s5, v0 -; GCN-IR-NEXT: v_sub_i32_e64 v0, vcc, 24, s4 +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB10_6 +; GCN-IR-NEXT: .LBB10_5: +; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 24, 0, s[10:11] +; GCN-IR-NEXT: .LBB10_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s4, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s4, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1963,65 +1951,55 @@ define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 % ; GCN-LABEL: s_test_srem24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s7, 0xf000 -; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 -; GCN-NEXT: v_cvt_f32_i32_e32 v0, s2 -; GCN-NEXT: s_mov_b32 s3, 0x41c00000 -; GCN-NEXT: s_mov_b32 s4, s0 -; GCN-NEXT: s_ashr_i32 s0, s2, 30 +; GCN-NEXT: s_ashr_i64 s[4:5], s[2:3], 40 +; GCN-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-NEXT: s_mov_b32 s5, 0x41c00000 +; GCN-NEXT: s_ashr_i32 s6, s4, 30 +; GCN-NEXT: s_or_b32 s6, s6, 1 ; GCN-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: s_or_b32 s8, s0, 1 +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mul_f32_e32 v1, 0x41c00000, v1 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mad_f32 v2, -v1, v0, s3 +; GCN-NEXT: v_mad_f32 v2, -v1, v0, s5 ; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s8, 0 -; GCN-NEXT: v_readfirstlane_b32 s1, v1 -; GCN-NEXT: s_add_i32 s0, s1, s0 -; GCN-NEXT: s_mul_i32 s0, s0, s2 -; GCN-NEXT: s_sub_i32 s0, 24, s0 -; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GCN-NEXT: s_ashr_i32 s1, s0, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem24_k_num_i64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 -; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s2 -; GCN-IR-NEXT: s_mov_b32 s3, 0x41c00000 -; GCN-IR-NEXT: s_mov_b32 s4, s0 -; GCN-IR-NEXT: s_ashr_i32 s0, s2, 30 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[2:3], 40 +; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s4 +; GCN-IR-NEXT: s_mov_b32 s5, 0x41c00000 +; GCN-IR-NEXT: s_ashr_i32 s6, s4, 30 +; GCN-IR-NEXT: s_or_b32 s6, s6, 1 ; GCN-IR-NEXT: v_rcp_iflag_f32_e32 v1, v0 -; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: s_or_b32 s8, s0, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v3, s6 +; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x41c00000, v1 ; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-IR-NEXT: v_mad_f32 v2, -v1, v0, s3 +; GCN-IR-NEXT: v_mad_f32 v2, -v1, v0, s5 ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v2|, |v0| -; GCN-IR-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-IR-NEXT: s_cselect_b32 s0, s8, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s1, v1 -; GCN-IR-NEXT: s_add_i32 s0, s1, s0 -; GCN-IR-NEXT: s_mul_i32 s0, s0, s2 -; GCN-IR-NEXT: s_sub_i32 s0, 24, s0 -; GCN-IR-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GCN-IR-NEXT: s_ashr_i32 s1, s0, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s1 -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %x.shr = ashr i64 %x, 40 %result = srem i64 24, %x.shr @@ -2033,62 +2011,58 @@ define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 % ; GCN-LABEL: s_test_srem24_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s8, 0x46b6fe00 +; GCN-NEXT: s_mov_b32 s4, 0x46b6fe00 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GCN-NEXT: s_ashr_i32 s3, s2, 30 +; GCN-NEXT: s_or_b32 s3, s3, 1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_mul_f32_e32 v2, 0x38331158, v0 +; GCN-NEXT: v_trunc_f32_e32 v2, v2 +; GCN-NEXT: v_mad_f32 v0, -v2, s4, v0 +; GCN-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GCN-NEXT: s_movk_i32 s3, 0x5b7f +; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-NEXT: s_mov_b32 s4, s0 -; GCN-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 -; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mad_f32 v0, -v1, s8, v0 -; GCN-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-NEXT: s_or_b32 s3, s0, 1 -; GCN-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, s8 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s3, 0 -; GCN-NEXT: v_readfirstlane_b32 s1, v1 -; GCN-NEXT: s_add_i32 s0, s1, s0 -; GCN-NEXT: s_mulk_i32 s0, 0x5b7f -; GCN-NEXT: s_sub_i32 s0, s2, s0 -; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GCN-NEXT: s_ashr_i32 s1, s0, 31 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_srem24_k_den_i64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b32 s8, 0x46b6fe00 +; GCN-IR-NEXT: s_mov_b32 s4, 0x46b6fe00 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[2:3], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s2 +; GCN-IR-NEXT: s_ashr_i32 s3, s2, 30 +; GCN-IR-NEXT: s_or_b32 s3, s3, 1 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: v_mul_f32_e32 v2, 0x38331158, v0 +; GCN-IR-NEXT: v_trunc_f32_e32 v2, v2 +; GCN-IR-NEXT: v_mad_f32 v0, -v2, s4, v0 +; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 +; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-IR-NEXT: s_mov_b32 s4, s0 -; GCN-IR-NEXT: s_ashr_i32 s0, s2, 30 ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_mul_f32_e32 v1, 0x38331158, v0 -; GCN-IR-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-IR-NEXT: v_mad_f32 v0, -v1, s8, v0 -; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 -; GCN-IR-NEXT: s_or_b32 s3, s0, 1 -; GCN-IR-NEXT: v_cmp_ge_f32_e64 s[0:1], |v0|, s8 -; GCN-IR-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-IR-NEXT: s_cselect_b32 s0, s3, 0 -; GCN-IR-NEXT: v_readfirstlane_b32 s1, v1 -; GCN-IR-NEXT: s_add_i32 s0, s1, s0 -; GCN-IR-NEXT: s_mulk_i32 s0, 0x5b7f -; GCN-IR-NEXT: s_sub_i32 s0, s2, s0 -; GCN-IR-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GCN-IR-NEXT: s_ashr_i32 s1, s0, 31 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s1 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %x.shr = ashr i64 %x, 40 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll index 7cc2b8214a36d..2ed121b159494 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; Make sure the stack is never realigned for entry functions. @@ -315,3 +315,6 @@ define amdgpu_kernel void @alignstack_attr() #2 { attributes #0 = { nounwind } attributes #1 = { nounwind "stackrealign" } attributes #2 = { nounwind alignstack=128 } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 300} diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 9f5658442de3d..f8784deee6582 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -156,12 +156,13 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; The BP value is saved/restored with a VGPR spill. ; GCN-LABEL: func_call_align1024_bp_gets_vgpr_spill: -; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN: v_writelane_b32 [[VGPR_REG]], s33, 2 +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 +; GCN-NEXT: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 +; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s33 offset:1028 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec +; GCN: v_writelane_b32 [[VGPR_REG]], [[FP_SCRATCH_COPY]], 2 ; GCN: v_writelane_b32 [[VGPR_REG]], s34, 3 -; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 -; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 ; GCN: s_mov_b32 s34, s32 ; GCN: v_writelane_b32 [[VGPR_REG]], s30, 0 ; GCN: s_add_i32 s32, s32, 0x30000 @@ -175,12 +176,13 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0 ; GCN: v_readlane_b32 s31, [[VGPR_REG]], 1 -; GCN: s_add_i32 s32, s32, 0xfffd0000 -; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 +; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s33 offset:1028 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_add_i32 s32, s32, 0xfffd0000 +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 s[30:31] %temp = alloca i32, align 1024, addrspace(5) store volatile i32 0, i32 addrspace(5)* %temp, align 1024 @@ -198,8 +200,8 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5 ; GCN-LABEL: needs_align1024_stack_args_used_inside_loop: ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 -; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34 ; GCN-NEXT: s_add_i32 s33, s32, 0xffc0 +; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34 ; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34 @@ -208,9 +210,9 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024 ; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen ; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]] -; GCN: s_add_i32 s32, s32, 0xfffd0000 +; GCN: s_mov_b32 s34, [[BP_COPY]] +; GCN-NEXT: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] -; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]] ; GCN-NEXT: s_setpc_b64 s[30:31] begin: %local_var = alloca i32, align 1024, addrspace(5) @@ -260,11 +262,12 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; If there are no free SGPRs or VGPRs available we must spill the BP to memory. ; GCN-LABEL: no_free_regs_spill_bp_to_mem -; GCN: s_or_saveexec_b64 s[4:5], -1 -; GCN: v_mov_b32_e32 v0, s33 -; GCN: buffer_store_dword v0, off, s[0:3], s32 +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN: s_xor_saveexec_b64 s[6:7], -1 +; GCN: v_mov_b32_e32 v0, [[FP_SCRATCH_COPY]] +; GCN: buffer_store_dword v0, off, s[0:3], s33 ; GCN: v_mov_b32_e32 v0, s34 -; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 +; GCN-DAG: buffer_store_dword v0, off, s[0:3], s33 %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 @@ -293,18 +296,21 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; scratch VGPR to hold the offset. ; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset -; GCN: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: s_add_i32 s6, s32, 0x42100 -; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, s33 +; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 +; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_add_i32 s5, s33, 0x42100 +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, [[FP_SCRATCH_COPY]] ; GCN-NOT: v_mov_b32_e32 v0, 0x1088 -; GCN-NEXT: s_add_i32 s6, s32, 0x42200 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GCN-NEXT: s_add_i32 s5, s33, 0x42200 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, s34 ; GCN-NOT: v_mov_b32_e32 v0, 0x108c -; GCN-NEXT: s_add_i32 s6, s32, 0x42300 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GCN-NEXT: s_add_i32 s5, s33, 0x42300 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/swdev373493.ll b/llvm/test/CodeGen/AMDGPU/swdev373493.ll new file mode 100644 index 0000000000000..dea192aad80a7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/swdev373493.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck %s + +@global = external protected addrspace(4) externally_initialized global [4096 x i64], align 16 + +define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6) unnamed_addr align 2 { +; CHECK-LABEL: bar: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v15, v12 +; CHECK-NEXT: v_mov_b32_e32 v14, v11 +; CHECK-NEXT: v_mov_b32_e32 v13, v10 +; CHECK-NEXT: v_mov_b32_e32 v12, v9 +; CHECK-NEXT: v_mov_b32_e32 v11, v8 +; CHECK-NEXT: v_mov_b32_e32 v10, v7 +; CHECK-NEXT: v_mov_b32_e32 v9, v6 +; CHECK-NEXT: v_mov_b32_e32 v8, v5 +; CHECK-NEXT: v_mov_b32_e32 v7, v4 +; CHECK-NEXT: v_mov_b32_e32 v6, v3 +; CHECK-NEXT: s_cmp_lt_i32 s4, 3 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %LeafBlock +; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 +; CHECK-NEXT: ; %bb.2: ; %bb7 +; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, global@rel32@lo+1948 +; CHECK-NEXT: s_addc_u32 s17, s17, global@rel32@hi+1956 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s16 +; CHECK-NEXT: v_mov_b32_e32 v1, s17 +; CHECK-NEXT: s_getpc_b64 s[18:19] +; CHECK-NEXT: s_add_u32 s18, s18, eggs@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s19, s19, eggs@rel32@hi+12 +; CHECK-NEXT: s_setpc_b64 s[18:19] +; CHECK-NEXT: .LBB0_3: ; %LeafBlock1 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_5 +; CHECK-NEXT: ; %bb.4: ; %bb8 +; CHECK-NEXT: v_mov_b32_e32 v0, v1 +; CHECK-NEXT: v_mov_b32_e32 v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, v6 +; CHECK-NEXT: v_mov_b32_e32 v3, v7 +; CHECK-NEXT: v_mov_b32_e32 v4, v8 +; CHECK-NEXT: v_mov_b32_e32 v5, v9 +; CHECK-NEXT: v_mov_b32_e32 v6, v10 +; CHECK-NEXT: v_mov_b32_e32 v7, v11 +; CHECK-NEXT: v_mov_b32_e32 v8, v12 +; CHECK-NEXT: v_mov_b32_e32 v9, v13 +; CHECK-NEXT: v_mov_b32_e32 v10, v14 +; CHECK-NEXT: v_mov_b32_e32 v11, v15 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12 +; CHECK-NEXT: s_setpc_b64 s[16:17] +; CHECK-NEXT: .LBB0_5: ; %bb9 +; CHECK-NEXT: s_setpc_b64 s[30:31] +bb: + switch i32 undef, label %bb9 [ + i32 3, label %bb8 + i32 1, label %bb7 + ] + +bb7: ; preds = %bb + %tmp = load ptr, ptr undef, align 8 + tail call fastcc void @eggs(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @global, i64 0, i64 243) to ptr), ptr %tmp, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5) + br label %bb9 + +bb8: ; preds = %bb + tail call fastcc void @quux(ptr noundef nonnull align 8 dereferenceable(24) %arg1, ptr noundef nonnull align 8 dereferenceable(24) %arg2, ptr noundef %arg3, ptr noundef %arg4, ptr noundef %arg5, ptr noundef nonnull align 8 dereferenceable(8) %arg6) + br label %bb9 + +bb9: ; preds = %bb8, %bb7, %bb + ret void +} + +declare dso_local fastcc void @eggs(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2 + +declare dso_local fastcc void @quux(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr align 2 diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll index 0c1c31c603fee..90963fc691427 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -17,11 +17,11 @@ define amdgpu_gfx float @caller(float %arg0) { ; GCN-LABEL: caller: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_writelane_b32 v1, s33, 3 +; GCN-NEXT: s_mov_b32 s36, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: v_writelane_b32 v1, s30, 1 @@ -35,11 +35,11 @@ define amdgpu_gfx float @caller(float %arg0) { ; GCN-NEXT: v_readlane_b32 s30, v1, 1 ; GCN-NEXT: v_readlane_b32 s31, v1, 2 ; GCN-NEXT: v_readlane_b32 s4, v1, 0 -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v1, 3 -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: s_mov_b32 s33, s36 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll b/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll index 9d060759a84be..adf8dc9baec44 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll @@ -1,9 +1,12 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s +; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s +; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s ; GFX90C-VALID: .hsa_code_object_isa 9,0,12,"AMD","AMDGPU" ; GFX90C-VALID: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx90c" ; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY ; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940 + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll index 9c166b7cc61ba..e81286bd7b52a 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x12C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -28,3 +28,6 @@ define void @func2() { entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll index f1d98b6e462fb..e4e5a5e00f2b5 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) ; ELF-NEXT: ] @@ -27,3 +27,6 @@ define void @func2() { entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll index c32eb66b7deb9..4eb827cdf10eb 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,5 @@ entry: } attributes #0 = { "target-features"="-xnack" } +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll index a15b005ca79fa..d74d2a8ce0351 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,6 @@ entry: } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll index f77949204942c..a479eed80aae7 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,6 @@ entry: } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll index 042254a03ec54..44fa2a02803c6 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,6 @@ entry: } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll index d5f6135994d36..e4c3388859a09 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,6 @@ entry: } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll index d0df9e463e063..d4f895b2d8fa1 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -30,3 +30,6 @@ entry: } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll index 0d24aa8f848d3..ec812689ac3e0 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s 2>&1 | FileCheck --check-prefixes=ERR %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck --check-prefixes=ERR %s ; ERR: error: xnack setting of 'func2' function does not match module xnack setting @@ -19,3 +19,6 @@ entry: attributes #0 = { "target-features"="-xnack" } attributes #1 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll index 90d56dd1d6c14..f58cd8f8c7ae1 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x12C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -18,3 +18,6 @@ define void @func0() { entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll index 7d2969b0a8054..1b33abe3074da 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" ; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22) ; ELF-NEXT: ] @@ -17,3 +17,6 @@ define void @func0() { entry: ret void } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll index 9758d2e8e8a17..7ec3fe4896f51 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x22C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -20,3 +20,6 @@ entry: } attributes #0 = { "target-features"="-xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll index 6474971e67534..f04f110618e57 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=ASM %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=obj < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF %s ; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" ; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' @@ -8,7 +8,7 @@ ; ASM: - 1 ; ELF: OS/ABI: AMDGPU_HSA (0x40) -; ELF: ABIVersion: 2 +; ELF: ABIVersion: 3 ; ELF: Flags [ (0x32C) ; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300) ; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C) @@ -20,3 +20,6 @@ entry: } attributes #0 = { "target-features"="+xnack" } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir new file mode 100644 index 0000000000000..d5276214d4a79 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# VGPR spill code in the prolog sets the `IsKill` flag for the spilled register +# based on the entry block's livein info. + +--- +name: vgpr_use_after_prolog_spill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + + ; GCN-LABEL: name: vgpr_use_after_prolog_spill + ; GCN: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... + +--- +name: livein_vgpr_def_after_prolog_spill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; GCN-LABEL: name: livein_vgpr_def_after_prolog_spill + ; GCN: liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... + +--- +name: livein_vgpr_not_referred_in_entry_bb +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: livein_vgpr_not_referred_in_entry_bb + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + bb.0: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + S_NOP 0 + S_BRANCH %bb.1 + bb.1: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll index a64507ca5861c..15445dafaefcd 100644 --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=3 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/200/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V2 %s +; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 @@ -1167,3 +1167,6 @@ define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll index 64f4064610468..e4cd18a0f424b 100644 --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -1,20 +1,29 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; enable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s ; disable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s -; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported declare void @llvm.trap() #0 @@ -41,11 +50,11 @@ declare void @llvm.debugtrap() #1 ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -69,10 +78,10 @@ define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.debugtrap() - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -82,11 +91,11 @@ define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %a ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -97,9 +106,9 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { ; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP-NEXT: s_trap 2 -define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { +define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { entry: - %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %tmp29 = load volatile i32, ptr addrspace(1) %arg0 %cmp = icmp eq i32 %tmp29, -1 br i1 %cmp, label %ret, label %trap @@ -108,9 +117,12 @@ trap: unreachable ret: - store volatile i32 3, i32 addrspace(1)* %arg0 + store volatile i32 3, ptr addrspace(1) %arg0 ret void } attributes #0 = { nounwind noreturn } attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} diff --git a/llvm/test/CodeGen/AMDGPU/trunc.ll b/llvm/test/CodeGen/AMDGPU/trunc.ll index f5401a1a254db..922d9e5f4eaa4 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc.ll @@ -96,7 +96,9 @@ define amdgpu_kernel void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) ; SI: s_load_dwordx2 s[[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13 ; VI: s_load_dwordx2 s[[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4c ; GCN: s_bitcmp1_b32 s[[SLO]], 0 -; GCN: s_cselect_b32 {{s[0-9]+}}, 63, -12 +; SI: s_cselect_b64 s[[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], -1, 0 +; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s[[[VLO]]:[[VHI]]] +; VI: s_cselect_b32 {{s[0-9]+}}, 63, -12 define amdgpu_kernel void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, [8 x i32], i64 %x) { %trunc = trunc i64 %x to i1 %sel = select i1 %trunc, i32 63, i32 -12 diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index daa9a90ad54d3..e320f69344c65 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -8,11 +8,12 @@ define internal fastcc void @widget() { ; GFX90A-LABEL: widget: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX90A-NEXT: s_mov_b64 exec, s[16:17] -; GFX90A-NEXT: v_writelane_b32 v40, s33, 2 +; GFX90A-NEXT: s_mov_b32 s16, s33 ; GFX90A-NEXT: s_mov_b32 s33, s32 +; GFX90A-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 @@ -30,67 +31,57 @@ bb: define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, i32 %tmp5.i.i, i32 %tmp427.i, i1 %tmp438.i, double %tmp27.i, i1 %tmp48.i) { ; GLOBALNESS1-LABEL: kernel: ; GLOBALNESS1: ; %bb.0: ; %bb -; GLOBALNESS1-NEXT: s_mov_b64 s[54:55], s[6:7] -; GLOBALNESS1-NEXT: s_load_dwordx4 s[36:39], s[8:9], 0x0 -; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, v0 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v44, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[6:7] +; GLOBALNESS1-NEXT: s_load_dwordx4 s[84:87], s[6:7], 0x0 +; GLOBALNESS1-NEXT: s_nop 0 +; GLOBALNESS1-NEXT: s_load_dword s6, s[6:7], 0x14 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: global_store_dword v[0:1], v44, off +; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: global_load_dword v0, v44, s[36:37] -; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GLOBALNESS1-NEXT: s_mov_b64 s[64:65], s[4:5] -; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 -; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20 -; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS1-NEXT: global_load_dword v0, v42, s[84:85] +; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s15 ; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, 0x40994400 -; GLOBALNESS1-NEXT: s_bitcmp1_b32 s38, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[4:5] +; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[38:39], 0x18 +; GLOBALNESS1-NEXT: s_load_dword s7, s[38:39], 0x20 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s86, 0 +; GLOBALNESS1-NEXT: s_cselect_b64 s[90:91], -1, 0 +; GLOBALNESS1-NEXT: s_xor_b64 s[92:93], s[90:91], -1 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, 0x40994400 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[36:37], s[4:5], v[44:45] -; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], 0 +; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43] +; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[88:89], s[4:5], 0 ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[94:95], s[4:5], -1 -; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] -; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GLOBALNESS1-NEXT: s_xor_b64 s[88:89], s[4:5], -1 ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s7, 0 ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7] ; GLOBALNESS1-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 -; GLOBALNESS1-NEXT: s_xor_b64 s[86:87], s[4:5], -1 -; GLOBALNESS1-NEXT: s_load_dwordx2 s[66:67], s[6:7], 0x0 -; GLOBALNESS1-NEXT: s_mov_b32 s98, s16 -; GLOBALNESS1-NEXT: s_mov_b64 s[62:63], s[8:9] -; GLOBALNESS1-NEXT: s_mov_b32 s99, s15 -; GLOBALNESS1-NEXT: s_mov_b32 s100, s14 -; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS1-NEXT: s_mov_b64 s[92:93], 0x80 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v1 -; GLOBALNESS1-NEXT: s_mov_b32 s69, 0x3ff00000 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[98:99], s[6:7], 0x0 +; GLOBALNESS1-NEXT: s_mov_b32 s58, s14 +; GLOBALNESS1-NEXT: s_mov_b32 s59, s13 +; GLOBALNESS1-NEXT: s_mov_b32 s60, s12 +; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[8:9] +; GLOBALNESS1-NEXT: s_mov_b64 s[62:63], 0x80 +; GLOBALNESS1-NEXT: s_xor_b64 s[96:97], s[4:5], -1 +; GLOBALNESS1-NEXT: s_mov_b32 s65, 0x3ff00000 ; GLOBALNESS1-NEXT: s_mov_b32 s32, 0 ; GLOBALNESS1-NEXT: ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 0 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 1 -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 2 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 3 -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 4 -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[90:91], 1, v0 -; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 5 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[42:43], 0, v0 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[44:45], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[48:49], 0, v0 ; GLOBALNESS1-NEXT: s_branch .LBB1_4 ; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 4 -; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 5 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[48:49] ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow6 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -136,32 +127,31 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_15 Depth 2 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[62:63], s[62:63] op_sel:[0,1] ; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] -; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40 -; GLOBALNESS1-NEXT: buffer_store_dword v44, off, s[0:3], 0 -; GLOBALNESS1-NEXT: flat_load_dword v43, v[0:1] -; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 +; GLOBALNESS1-NEXT: flat_load_dword v44, v[0:1] +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s60 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s59 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s58 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67] -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[42:43] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[98:99] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[90:91] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_8 ; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lt_i32 s39, 1 +; GLOBALNESS1-NEXT: s_cmp_lt_i32 s87, 1 ; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock3 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s39, 1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s87, 1 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: s_cbranch_execnz .LBB1_8 @@ -178,131 +168,125 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS1-NEXT: flat_load_dword v0, v[0:1] -; GLOBALNESS1-NEXT: s_mov_b32 s68, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s70, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s71, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s72, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s73, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s74, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s75, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s76, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s77, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s78, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s79, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s80, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s81, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s82, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s83, s69 -; GLOBALNESS1-NEXT: s_mov_b32 s84, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s85, s69 +; GLOBALNESS1-NEXT: s_mov_b32 s64, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s66, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s67, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s68, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s65 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s65 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[96:97], 0, v0 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[70:71], s[96:97] +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[50:51], 0, v0 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[66:67], s[50:51] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS1-NEXT: ; %bb.10: ; %bb33.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS1-NEXT: v_readlane_b32 s4, v41, 0 -; GLOBALNESS1-NEXT: v_readlane_b32 s5, v41, 1 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[42:43] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_12 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb39.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[44:45], off +; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS1-NEXT: .LBB1_12: ; %bb44.lr.ph.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v43 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v44 ; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc -; GLOBALNESS1-NEXT: s_mov_b64 s[72:73], s[42:43] -; GLOBALNESS1-NEXT: s_mov_b32 s75, s39 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1] -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2 +; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e64 s[52:53], 0, v[0:1] +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[54:55], 0, v2 ; GLOBALNESS1-NEXT: s_branch .LBB1_15 ; GLOBALNESS1-NEXT: .LBB1_13: ; %Flow7 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS1-NEXT: .LBB1_14: ; %bb63.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[86:87] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[96:97] ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS1-NEXT: .LBB1_15: ; %bb44.i ; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[94:95] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[92:93] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.16: ; %bb46.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[88:89] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[94:95] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.17: ; %bb50.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[36:37] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[40:41] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_20 ; GLOBALNESS1-NEXT: ; %bb.18: ; %bb3.i.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[40:41] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[88:89] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_20 ; GLOBALNESS1-NEXT: ; %bb.19: ; %bb6.i.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[56:57] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[52:53] ; GLOBALNESS1-NEXT: .LBB1_20: ; %spam.exit.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[90:91] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[44:45] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.21: ; %bb55.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: s_add_u32 s60, s62, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s61, s63, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61] +; GLOBALNESS1-NEXT: s_add_u32 s56, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s57, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[56:57] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s60 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s59 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s58 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[98:99] ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[56:57] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s60 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s59 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s58 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], a[32:33], off -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[66:67] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[98:99] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[54:55] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_13 ; GLOBALNESS1-NEXT: ; %bb.22: ; %bb62.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_13 ; GLOBALNESS1-NEXT: .LBB1_23: ; %LeafBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s39, 0 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s87, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] @@ -313,170 +297,150 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS1-NEXT: s_branch .LBB1_3 ; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow14 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] -; GLOBALNESS1-NEXT: s_mov_b32 s36, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s37, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s38, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s39, s93 -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[40:41] -; GLOBALNESS1-NEXT: s_mov_b32 s40, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s41, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s42, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s43, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s44, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s45, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s46, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s47, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s48, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s49, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s50, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s51, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s52, s93 -; GLOBALNESS1-NEXT: s_mov_b32 s53, s93 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1] -; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[6:7] -; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[4:5] -; GLOBALNESS1-NEXT: s_mov_b32 s39, s75 -; GLOBALNESS1-NEXT: s_mov_b64 s[42:43], s[72:73] +; GLOBALNESS1-NEXT: s_mov_b32 s68, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s82, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s83, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s84, s63 +; GLOBALNESS1-NEXT: s_mov_b32 s85, s63 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] ; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow15 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[70:71] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[66:67] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[50:51] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 2 -; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 3 -; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[46:47] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 ; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_1 ; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_2 ; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32 ; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] -; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 -; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s60 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s59 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s58 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS1-NEXT: s_add_u32 s6, s6, widget@rel32@lo+4 +; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, widget@rel32@hi+12 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34 ; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s62, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s63, 0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[64:65] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] -; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 -; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s60 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s59 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s58 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS1-NEXT: s_add_u32 s6, s6, widget@rel32@lo+4 +; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, widget@rel32@hi+12 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock ; ; GLOBALNESS0-LABEL: kernel: ; GLOBALNESS0: ; %bb.0: ; %bb -; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[6:7] -; GLOBALNESS0-NEXT: s_load_dwordx4 s[36:39], s[8:9], 0x0 -; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, v0 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v44, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[6:7] +; GLOBALNESS0-NEXT: s_load_dwordx4 s[60:63], s[6:7], 0x0 +; GLOBALNESS0-NEXT: s_nop 0 +; GLOBALNESS0-NEXT: s_load_dword s6, s[6:7], 0x14 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: global_store_dword v[0:1], v44, off +; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: global_load_dword v0, v44, s[36:37] -; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 -; GLOBALNESS0-NEXT: s_mov_b64 s[62:63], s[4:5] -; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 -; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20 -; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 -; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS0-NEXT: global_load_dword v0, v42, s[60:61] +; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s10, s15 +; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s15 ; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, 0x40994400 -; GLOBALNESS0-NEXT: s_bitcmp1_b32 s38, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[4:5] +; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[38:39], 0x18 +; GLOBALNESS0-NEXT: s_load_dword s7, s[38:39], 0x20 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s62, 0 +; GLOBALNESS0-NEXT: s_cselect_b64 s[90:91], -1, 0 +; GLOBALNESS0-NEXT: s_xor_b64 s[92:93], s[90:91], -1 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, 0x40994400 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[36:37], s[4:5], v[44:45] -; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], 0 +; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43] +; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[88:89], s[4:5], 0 ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[94:95], s[4:5], -1 -; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] -; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GLOBALNESS0-NEXT: s_xor_b64 s[88:89], s[4:5], -1 ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s7, 0 ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7] ; GLOBALNESS0-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 -; GLOBALNESS0-NEXT: s_xor_b64 s[86:87], s[4:5], -1 -; GLOBALNESS0-NEXT: s_load_dwordx2 s[66:67], s[6:7], 0x0 -; GLOBALNESS0-NEXT: s_mov_b32 s98, s16 -; GLOBALNESS0-NEXT: s_mov_b64 s[60:61], s[8:9] -; GLOBALNESS0-NEXT: s_mov_b32 s99, s15 -; GLOBALNESS0-NEXT: s_mov_b32 s100, s14 -; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS0-NEXT: s_mov_b64 s[92:93], 0x80 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v1 -; GLOBALNESS0-NEXT: s_mov_b32 s69, 0x3ff00000 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[98:99], s[6:7], 0x0 +; GLOBALNESS0-NEXT: s_mov_b32 s56, s14 +; GLOBALNESS0-NEXT: s_mov_b32 s57, s13 +; GLOBALNESS0-NEXT: s_mov_b32 s58, s12 +; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[8:9] +; GLOBALNESS0-NEXT: s_mov_b64 s[86:87], 0x80 +; GLOBALNESS0-NEXT: s_xor_b64 s[96:97], s[4:5], -1 +; GLOBALNESS0-NEXT: s_mov_b32 s65, 0x3ff00000 ; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 ; GLOBALNESS0-NEXT: ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 0 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 1 -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 2 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 3 -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 4 -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[90:91], 1, v0 -; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 5 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[42:43], 0, v0 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[44:45], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[48:49], 0, v0 ; GLOBALNESS0-NEXT: s_branch .LBB1_4 ; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 4 -; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 5 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[48:49] ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow6 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -522,32 +486,31 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_15 Depth 2 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[86:87], s[86:87] op_sel:[0,1] ; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] -; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40 -; GLOBALNESS0-NEXT: buffer_store_dword v44, off, s[0:3], 0 -; GLOBALNESS0-NEXT: flat_load_dword v43, v[0:1] -; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 +; GLOBALNESS0-NEXT: flat_load_dword v44, v[0:1] +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s58 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s57 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s56 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67] -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[42:43] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[98:99] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[90:91] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_8 ; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lt_i32 s39, 1 +; GLOBALNESS0-NEXT: s_cmp_lt_i32 s63, 1 ; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock3 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s39, 1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s63, 1 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: s_cbranch_execnz .LBB1_8 @@ -564,131 +527,125 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS0-NEXT: flat_load_dword v0, v[0:1] -; GLOBALNESS0-NEXT: s_mov_b32 s68, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s70, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s71, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s72, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s73, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s74, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s75, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s76, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s77, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s78, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s79, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s80, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s81, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s82, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s83, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s84, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s85, s69 +; GLOBALNESS0-NEXT: s_mov_b32 s64, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s66, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s67, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s68, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s69, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s72, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s73, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s74, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s75, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s77, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s80, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s81, s65 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[96:97], 0, v0 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[70:71], s[96:97] +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[50:51], 0, v0 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[66:67], s[50:51] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS0-NEXT: ; %bb.10: ; %bb33.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 0 -; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 1 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[42:43] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_12 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb39.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[44:45], off +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS0-NEXT: .LBB1_12: ; %bb44.lr.ph.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v43 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v44 ; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc -; GLOBALNESS0-NEXT: s_mov_b64 s[72:73], s[42:43] -; GLOBALNESS0-NEXT: s_mov_b32 s75, s39 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1] -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2 +; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e64 s[52:53], 0, v[0:1] +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[54:55], 0, v2 ; GLOBALNESS0-NEXT: s_branch .LBB1_15 ; GLOBALNESS0-NEXT: .LBB1_13: ; %Flow7 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS0-NEXT: .LBB1_14: ; %bb63.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[86:87] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[96:97] ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS0-NEXT: .LBB1_15: ; %bb44.i ; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[94:95] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[92:93] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.16: ; %bb46.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[88:89] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[94:95] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.17: ; %bb50.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[36:37] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[40:41] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_20 ; GLOBALNESS0-NEXT: ; %bb.18: ; %bb3.i.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[40:41] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[88:89] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_20 ; GLOBALNESS0-NEXT: ; %bb.19: ; %bb6.i.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[56:57] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[52:53] ; GLOBALNESS0-NEXT: .LBB1_20: ; %spam.exit.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[90:91] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[44:45] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.21: ; %bb55.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: s_add_u32 s64, s60, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s65, s61, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55] -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65] +; GLOBALNESS0-NEXT: s_add_u32 s60, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s61, s39, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[60:61] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67] +; GLOBALNESS0-NEXT: s_mov_b32 s12, s58 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s57 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s56 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[98:99] ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55] -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[60:61] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s58 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s57 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s56 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], a[32:33], off -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[66:67] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[98:99] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[54:55] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_13 ; GLOBALNESS0-NEXT: ; %bb.22: ; %bb62.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_15 Depth=2 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_13 ; GLOBALNESS0-NEXT: .LBB1_23: ; %LeafBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s39, 0 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s63, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] @@ -699,105 +656,95 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, ; GLOBALNESS0-NEXT: s_branch .LBB1_3 ; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow14 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] -; GLOBALNESS0-NEXT: s_mov_b32 s36, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s37, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s38, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s39, s93 -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[40:41] -; GLOBALNESS0-NEXT: s_mov_b32 s40, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s41, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s42, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s43, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s44, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s45, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s46, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s47, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s48, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s49, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s50, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s51, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s52, s93 -; GLOBALNESS0-NEXT: s_mov_b32 s53, s93 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1] -; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[6:7] -; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[4:5] -; GLOBALNESS0-NEXT: s_mov_b32 s39, s75 -; GLOBALNESS0-NEXT: s_mov_b64 s[42:43], s[72:73] +; GLOBALNESS0-NEXT: s_mov_b32 s68, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s69, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s72, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s73, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s74, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s75, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s77, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s80, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s81, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s82, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s83, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s84, s87 +; GLOBALNESS0-NEXT: s_mov_b32 s85, s87 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] ; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow15 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[70:71] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[66:67] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[50:51] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 2 -; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 3 -; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[46:47] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 ; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_1 ; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_2 ; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32 ; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] -; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 -; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS0-NEXT: s_mov_b32 s12, s58 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s57 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s56 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS0-NEXT: s_add_u32 s6, s6, widget@rel32@lo+4 +; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, widget@rel32@hi+12 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34 ; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s60, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s61, 0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[62:63] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[54:55] +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s100 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s99 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s98 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v42 -; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] -; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 -; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS0-NEXT: s_mov_b32 s12, s58 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s57 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s56 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 +; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS0-NEXT: s_add_u32 s6, s6, widget@rel32@lo+4 +; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, widget@rel32@hi+12 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock bb: store i32 0, i32 addrspace(1)* null, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll index e832dc7614d3e..53353cad8fb7f 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -7,18 +7,18 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY killed $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed $sgpr1 - ; CHECK-NEXT: undef %0.sub0:sreg_64 = COPY killed [[COPY]] - ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] + ; CHECK-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY killed $sgpr0 + ; CHECK-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY killed $sgpr1 + ; CHECK-NEXT: undef %0.sub0:sreg_64 = PRED_COPY killed [[PRED_COPY]] + ; CHECK-NEXT: %0.sub1:sreg_64 = PRED_COPY killed [[PRED_COPY1]] ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (invariant load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (invariant load (s32) from %ir.ptr + 8, addrspace 4) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 - ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 - ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] + ; CHECK-NEXT: [[PRED_COPY2:%[0-9]+]]:sreg_32 = PRED_COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: $sgpr0 = PRED_COPY killed [[PRED_COPY2]] + ; CHECK-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_32 = PRED_COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: $sgpr1 = PRED_COPY killed [[PRED_COPY3]] + ; CHECK-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY killed [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: $sgpr2 = PRED_COPY killed [[PRED_COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir index 7757c758134c7..e0d0684a06366 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -16,7 +16,7 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec @@ -37,7 +37,7 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec @@ -124,7 +124,7 @@ body: | ... # GCN-LABEL: name: test_fmaak_other_constantlike_src0_f32 -# GCN: %2:vgpr_32 = COPY %1 +# GCN: %2:vgpr_32 = PRED_COPY %1 # GCN: %2:vgpr_32 = V_FMAC_F32_e32 %stack.0, %0, %2, implicit $mode, implicit $exec --- name: test_fmaak_other_constantlike_src0_f32 @@ -166,7 +166,7 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 %0.sub0, %2, %1, implicit $mode, implicit $exec %4 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec @@ -187,7 +187,7 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 + %1 = PRED_COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec S_NOP 0, implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index 889c3fc8b9d0c..0c9bf2ac2f768 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -31,11 +31,11 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; SI-NEXT: v_mul_hi_u32 v2, v0, v2 ; SI-NEXT: v_mul_lo_u32 v3, v2, v1 ; SI-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; SI-NEXT: v_subrev_i32_e32 v0, vcc, v3, v0 +; SI-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v1 +; SI-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] ; SI-NEXT: v_subrev_i32_e32 v3, vcc, v1, v0 -; SI-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 -; SI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; SI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; SI-NEXT: v_add_i32_e32 v3, vcc, 1, v2 ; SI-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc @@ -67,11 +67,11 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; VI-NEXT: v_mul_hi_u32 v2, v0, v2 ; VI-NEXT: v_mul_lo_u32 v3, v2, v1 ; VI-NEXT: v_add_u32_e32 v4, vcc, 1, v2 -; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 +; VI-NEXT: v_subrev_u32_e32 v0, vcc, v3, v0 +; VI-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] ; VI-NEXT: v_subrev_u32_e32 v3, vcc, v1, v0 -; VI-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 -; VI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; VI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; VI-NEXT: v_add_u32_e32 v3, vcc, 1, v2 ; VI-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc @@ -99,11 +99,11 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; GCN-NEXT: v_mov_b32_e32 v3, s1 ; GCN-NEXT: v_mul_lo_u32 v5, v4, v1 ; GCN-NEXT: v_add_u32_e32 v6, vcc, 1, v4 -; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v5 +; GCN-NEXT: v_subrev_u32_e32 v0, vcc, v5, v0 +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] ; GCN-NEXT: v_subrev_u32_e32 v5, vcc, v1, v0 -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1] ; GCN-NEXT: v_add_u32_e32 v5, vcc, 1, v4 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 ; GCN-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc @@ -117,29 +117,25 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] ; GFX1030-NEXT: s_waitcnt vmcnt(0) -; GFX1030-NEXT: v_readfirstlane_b32 s2, v1 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v0 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX1030-NEXT: s_sub_i32 s4, 0, s2 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX1030-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX1030-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1030-NEXT: s_mul_i32 s4, s4, s3 -; GFX1030-NEXT: s_mul_hi_u32 s4, s3, s4 -; GFX1030-NEXT: s_add_i32 s3, s3, s4 -; GFX1030-NEXT: s_mul_hi_u32 s3, s5, s3 -; GFX1030-NEXT: s_mul_i32 s4, s3, s2 -; GFX1030-NEXT: s_sub_i32 s4, s5, s4 -; GFX1030-NEXT: s_add_i32 s5, s3, 1 -; GFX1030-NEXT: s_sub_i32 s6, s4, s2 -; GFX1030-NEXT: s_cmp_ge_u32 s4, s2 -; GFX1030-NEXT: s_cselect_b32 s3, s5, s3 -; GFX1030-NEXT: s_cselect_b32 s4, s6, s4 -; GFX1030-NEXT: s_add_i32 s5, s3, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s4, s2 -; GFX1030-NEXT: s_cselect_b32 s2, s5, s3 -; GFX1030-NEXT: v_mov_b32_e32 v0, s2 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v4, 0, v1 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX1030-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX1030-NEXT: v_mul_lo_u32 v4, v4, v3 +; GFX1030-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX1030-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX1030-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX1030-NEXT: v_mul_lo_u32 v4, v3, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v0, v0, v4 +; GFX1030-NEXT: v_add_nc_u32_e32 v4, 1, v3 +; GFX1030-NEXT: v_sub_nc_u32_e32 v5, v0, v1 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo +; GFX1030-NEXT: v_add_nc_u32_e32 v4, 1, v3 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc_lo ; GFX1030-NEXT: global_store_dword v2, v0, s[0:1] ; GFX1030-NEXT: s_endpgm ; @@ -198,21 +194,18 @@ define amdgpu_kernel void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ; SI-NEXT: v_mul_lo_u32 v1, s4, v0 ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: v_mul_hi_u32 v1, v0, v1 -; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; SI-NEXT: v_mul_hi_u32 v0, s2, v0 -; SI-NEXT: v_readfirstlane_b32 s0, v0 -; SI-NEXT: s_mul_i32 s0, s0, s3 -; SI-NEXT: s_sub_i32 s0, s2, s0 -; SI-NEXT: s_sub_i32 s1, s0, s3 -; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; SI-NEXT: s_cmp_ge_u32 s0, s3 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-NEXT: s_cselect_b32 s0, s1, s0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; SI-NEXT: s_cmp_ge_u32 s0, s3 -; SI-NEXT: s_cselect_b64 vcc, -1, 0 -; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mul_lo_u32 v1, v0, s3 +; SI-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; SI-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; SI-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 +; SI-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; SI-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; SI-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -233,56 +226,51 @@ define amdgpu_kernel void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ; VI-NEXT: v_mul_hi_u32 v1, v0, v1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0 ; VI-NEXT: v_mul_hi_u32 v0, s2, v0 -; VI-NEXT: v_readfirstlane_b32 s0, v0 -; VI-NEXT: s_mul_i32 s0, s0, s3 -; VI-NEXT: s_sub_i32 s0, s2, s0 -; VI-NEXT: s_sub_i32 s1, s0, s3 -; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 -; VI-NEXT: s_cmp_ge_u32 s0, s3 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; VI-NEXT: s_cselect_b32 s0, s1, s0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 -; VI-NEXT: s_cmp_ge_u32 s0, s3 -; VI-NEXT: s_cselect_b64 vcc, -1, 0 -; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: v_mul_lo_u32 v1, v0, s3 +; VI-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; VI-NEXT: v_sub_u32_e32 v1, vcc, s2, v1 +; VI-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 +; VI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; VI-NEXT: v_subrev_u32_e32 v2, vcc, s3, v1 +; VI-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; VI-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; VI-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GCN-LABEL: s_udiv_i32: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GCN-NEXT: s_sub_i32 s4, 0, s3 +; GCN-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GCN-NEXT: s_sub_i32 s0, 0, s7 ; GCN-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GCN-NEXT: v_mul_lo_u32 v1, s4, v0 +; GCN-NEXT: v_mul_lo_u32 v1, s0, v0 ; GCN-NEXT: v_mul_hi_u32 v1, v0, v1 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v1, v0 -; GCN-NEXT: v_mul_hi_u32 v0, s2, v0 -; GCN-NEXT: v_readfirstlane_b32 s4, v0 -; GCN-NEXT: s_mul_i32 s4, s4, s3 -; GCN-NEXT: s_sub_i32 s2, s2, s4 -; GCN-NEXT: s_sub_i32 s4, s2, s3 -; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0 -; GCN-NEXT: s_cmp_ge_u32 s2, s3 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: s_cselect_b32 s2, s4, s2 -; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0 -; GCN-NEXT: s_cmp_ge_u32 s2, s3 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_mul_hi_u32 v0, s6, v0 +; GCN-NEXT: v_mul_lo_u32 v1, v0, s7 +; GCN-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GCN-NEXT: v_sub_u32_e32 v1, vcc, s6, v1 +; GCN-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v1 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: v_subrev_u32_e32 v2, vcc, s7, v1 +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GCN-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm ; ; GFX1030-LABEL: s_udiv_i32: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX1030-NEXT: v_mov_b32_e32 v3, 0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_cvt_f32_u32_e32 v0, s3 ; GFX1030-NEXT: s_sub_i32 s5, 0, s3 @@ -290,23 +278,24 @@ define amdgpu_kernel void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ; GFX1030-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX1030-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1030-NEXT: v_mov_b32_e32 v0, 0 ; GFX1030-NEXT: s_mul_i32 s5, s5, s4 ; GFX1030-NEXT: s_mul_hi_u32 s5, s4, s5 ; GFX1030-NEXT: s_add_i32 s4, s4, s5 ; GFX1030-NEXT: s_mul_hi_u32 s4, s2, s4 ; GFX1030-NEXT: s_mul_i32 s5, s4, s3 ; GFX1030-NEXT: s_sub_i32 s2, s2, s5 -; GFX1030-NEXT: s_add_i32 s5, s4, 1 -; GFX1030-NEXT: s_sub_i32 s6, s2, s3 ; GFX1030-NEXT: s_cmp_ge_u32 s2, s3 -; GFX1030-NEXT: s_cselect_b32 s4, s5, s4 -; GFX1030-NEXT: s_cselect_b32 s2, s6, s2 +; GFX1030-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX1030-NEXT: s_add_i32 s5, s4, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s2, s3 -; GFX1030-NEXT: s_cselect_b32 s2, s5, s4 -; GFX1030-NEXT: v_mov_b32_e32 v1, s2 -; GFX1030-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1030-NEXT: v_mov_b32_e32 v0, s5 +; GFX1030-NEXT: s_sub_i32 s5, s2, s3 +; GFX1030-NEXT: v_mov_b32_e32 v1, s5 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, s4, v0, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX1030-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX1030-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1030-NEXT: global_store_dword v3, v0, s[0:1] ; GFX1030-NEXT: s_endpgm ; ; EG-LABEL: s_udiv_i32: @@ -373,21 +362,21 @@ define amdgpu_kernel void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; SI-NEXT: v_mul_hi_u32 v6, v4, v6 ; SI-NEXT: v_mul_hi_u32 v7, v5, v7 ; SI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; SI-NEXT: v_mul_hi_u32 v4, v0, v4 ; SI-NEXT: v_mul_hi_u32 v5, v1, v5 ; SI-NEXT: v_mul_lo_u32 v6, v4, v2 ; SI-NEXT: v_mul_lo_u32 v8, v5, v3 ; SI-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; SI-NEXT: v_subrev_i32_e32 v0, vcc, v6, v0 ; SI-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 ; SI-NEXT: v_add_i32_e32 v9, vcc, 1, v5 ; SI-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v2 ; SI-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v3 -; SI-NEXT: v_subrev_i32_e32 v6, vcc, v2, v0 ; SI-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; SI-NEXT: v_subrev_i32_e32 v7, vcc, v3, v1 +; SI-NEXT: v_subrev_i32_e32 v6, vcc, v2, v0 ; SI-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[2:3] +; SI-NEXT: v_subrev_i32_e32 v7, vcc, v3, v1 ; SI-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; SI-NEXT: v_add_i32_e32 v6, vcc, 1, v4 ; SI-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] @@ -428,21 +417,21 @@ define amdgpu_kernel void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; VI-NEXT: v_mul_hi_u32 v6, v4, v6 ; VI-NEXT: v_mul_hi_u32 v7, v5, v7 ; VI-NEXT: v_add_u32_e32 v4, vcc, v6, v4 -; VI-NEXT: v_add_u32_e32 v5, vcc, v5, v7 +; VI-NEXT: v_add_u32_e32 v5, vcc, v7, v5 ; VI-NEXT: v_mul_hi_u32 v4, v0, v4 ; VI-NEXT: v_mul_hi_u32 v5, v1, v5 ; VI-NEXT: v_mul_lo_u32 v6, v4, v2 ; VI-NEXT: v_mul_lo_u32 v8, v5, v3 ; VI-NEXT: v_add_u32_e32 v7, vcc, 1, v4 ; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v6 -; VI-NEXT: v_sub_u32_e32 v1, vcc, v1, v8 +; VI-NEXT: v_subrev_u32_e32 v1, vcc, v8, v1 ; VI-NEXT: v_add_u32_e32 v9, vcc, 1, v5 ; VI-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v2 ; VI-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v3 -; VI-NEXT: v_subrev_u32_e32 v6, vcc, v2, v0 ; VI-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[0:1] -; VI-NEXT: v_subrev_u32_e32 v7, vcc, v3, v1 +; VI-NEXT: v_sub_u32_e32 v6, vcc, v0, v2 ; VI-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[2:3] +; VI-NEXT: v_subrev_u32_e32 v7, vcc, v3, v1 ; VI-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; VI-NEXT: v_add_u32_e32 v6, vcc, 1, v4 ; VI-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] @@ -480,20 +469,20 @@ define amdgpu_kernel void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; GCN-NEXT: v_mul_hi_u32 v8, v7, v8 ; GCN-NEXT: v_add_u32_e32 v6, vcc, v9, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v6 -; GCN-NEXT: v_add_u32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_add_u32_e32 v7, vcc, v8, v7 ; GCN-NEXT: v_mul_hi_u32 v7, v1, v7 ; GCN-NEXT: v_mul_lo_u32 v8, v6, v2 ; GCN-NEXT: v_add_u32_e32 v9, vcc, 1, v6 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v3 ; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v8 ; GCN-NEXT: v_add_u32_e32 v11, vcc, 1, v7 -; GCN-NEXT: v_sub_u32_e32 v1, vcc, v1, v10 +; GCN-NEXT: v_subrev_u32_e32 v1, vcc, v10, v1 ; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v0, v2 ; GCN-NEXT: v_cmp_ge_u32_e64 s[2:3], v1, v3 -; GCN-NEXT: v_subrev_u32_e32 v8, vcc, v2, v0 ; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1] -; GCN-NEXT: v_subrev_u32_e32 v9, vcc, v3, v1 +; GCN-NEXT: v_sub_u32_e32 v8, vcc, v0, v2 ; GCN-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[2:3] +; GCN-NEXT: v_subrev_u32_e32 v9, vcc, v3, v1 ; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[0:1] ; GCN-NEXT: v_add_u32_e32 v8, vcc, 1, v6 ; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[2:3] @@ -507,58 +496,50 @@ define amdgpu_kernel void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad ; ; GFX1030-LABEL: udiv_v2i32: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX1030-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX1030-NEXT: v_mov_b32_e32 v4, 0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) -; GFX1030-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; GFX1030-NEXT: global_load_dwordx4 v[0:3], v4, s[6:7] ; GFX1030-NEXT: s_waitcnt vmcnt(0) -; GFX1030-NEXT: v_readfirstlane_b32 s2, v2 -; GFX1030-NEXT: v_readfirstlane_b32 s3, v3 -; GFX1030-NEXT: v_readfirstlane_b32 s6, v0 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v2, s2 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v3, s3 -; GFX1030-NEXT: s_sub_i32 s5, 0, s2 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; GFX1030-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX1030-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v3 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1030-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1030-NEXT: s_mul_i32 s5, s5, s4 -; GFX1030-NEXT: s_mul_hi_u32 s5, s4, s5 -; GFX1030-NEXT: s_add_i32 s4, s4, s5 -; GFX1030-NEXT: s_mul_hi_u32 s4, s6, s4 -; GFX1030-NEXT: s_mul_i32 s5, s4, s2 -; GFX1030-NEXT: s_sub_i32 s5, s6, s5 -; GFX1030-NEXT: s_add_i32 s6, s4, 1 -; GFX1030-NEXT: s_sub_i32 s7, s5, s2 -; GFX1030-NEXT: s_cmp_ge_u32 s5, s2 -; GFX1030-NEXT: s_cselect_b32 s4, s6, s4 -; GFX1030-NEXT: s_cselect_b32 s5, s7, s5 -; GFX1030-NEXT: s_add_i32 s6, s4, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s5, s2 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1030-NEXT: s_cselect_b32 s2, s6, s4 -; GFX1030-NEXT: s_sub_i32 s4, 0, s3 -; GFX1030-NEXT: v_mov_b32_e32 v0, s2 -; GFX1030-NEXT: s_mul_i32 s4, s4, s8 -; GFX1030-NEXT: s_mul_hi_u32 s4, s8, s4 -; GFX1030-NEXT: s_add_i32 s8, s8, s4 -; GFX1030-NEXT: s_mul_hi_u32 s4, s5, s8 -; GFX1030-NEXT: s_mul_i32 s6, s4, s3 -; GFX1030-NEXT: s_sub_i32 s5, s5, s6 -; GFX1030-NEXT: s_add_i32 s6, s4, 1 -; GFX1030-NEXT: s_sub_i32 s7, s5, s3 -; GFX1030-NEXT: s_cmp_ge_u32 s5, s3 -; GFX1030-NEXT: s_cselect_b32 s4, s6, s4 -; GFX1030-NEXT: s_cselect_b32 s5, s7, s5 -; GFX1030-NEXT: s_add_i32 s6, s4, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s5, s3 -; GFX1030-NEXT: s_cselect_b32 s3, s6, s4 -; GFX1030-NEXT: v_mov_b32_e32 v1, s3 -; GFX1030-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX1030-NEXT: v_cvt_f32_u32_e32 v5, v2 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GFX1030-NEXT: v_sub_nc_u32_e32 v7, 0, v2 +; GFX1030-NEXT: v_sub_nc_u32_e32 v8, 0, v3 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX1030-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 +; GFX1030-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX1030-NEXT: v_mul_lo_u32 v7, v7, v5 +; GFX1030-NEXT: v_mul_lo_u32 v8, v8, v6 +; GFX1030-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX1030-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX1030-NEXT: v_add_nc_u32_e32 v5, v5, v7 +; GFX1030-NEXT: v_add_nc_u32_e32 v6, v6, v8 +; GFX1030-NEXT: v_mul_hi_u32 v5, v0, v5 +; GFX1030-NEXT: v_mul_hi_u32 v6, v1, v6 +; GFX1030-NEXT: v_mul_lo_u32 v7, v5, v2 +; GFX1030-NEXT: v_mul_lo_u32 v8, v6, v3 +; GFX1030-NEXT: v_sub_nc_u32_e32 v0, v0, v7 +; GFX1030-NEXT: v_add_nc_u32_e32 v7, 1, v5 +; GFX1030-NEXT: v_sub_nc_u32_e32 v1, v1, v8 +; GFX1030-NEXT: v_add_nc_u32_e32 v8, 1, v6 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 +; GFX1030-NEXT: v_sub_nc_u32_e32 v9, v1, v3 +; GFX1030-NEXT: v_cmp_ge_u32_e64 s0, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX1030-NEXT: v_sub_nc_u32_e32 v7, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v6, v6, v8, s0 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, v1, v9, s0 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX1030-NEXT: v_add_nc_u32_e32 v7, 1, v5 +; GFX1030-NEXT: v_add_nc_u32_e32 v8, 1, v6 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc_lo +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc_lo +; GFX1030-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] ; GFX1030-NEXT: s_endpgm ; ; EG-LABEL: udiv_v2i32: @@ -660,9 +641,9 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; SI-NEXT: v_mul_hi_u32 v11, v10, v11 ; SI-NEXT: v_mul_hi_u32 v13, v12, v13 ; SI-NEXT: v_mul_hi_u32 v15, v14, v15 -; SI-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; SI-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; SI-NEXT: v_add_i32_e32 v9, vcc, v11, v10 -; SI-NEXT: v_add_i32_e32 v10, vcc, v12, v13 +; SI-NEXT: v_add_i32_e32 v10, vcc, v13, v12 ; SI-NEXT: v_add_i32_e32 v11, vcc, v15, v14 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mul_hi_u32 v8, v4, v8 @@ -673,7 +654,7 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; SI-NEXT: v_mul_lo_u32 v14, v9, v1 ; SI-NEXT: v_mul_lo_u32 v16, v10, v2 ; SI-NEXT: v_mul_lo_u32 v18, v11, v3 -; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v12 +; SI-NEXT: v_subrev_i32_e32 v4, vcc, v12, v4 ; SI-NEXT: v_sub_i32_e32 v5, vcc, v5, v14 ; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v16 ; SI-NEXT: v_sub_i32_e32 v7, vcc, v7, v18 @@ -685,14 +666,14 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; SI-NEXT: v_cmp_ge_u32_e64 s[2:3], v5, v1 ; SI-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 ; SI-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; SI-NEXT: v_subrev_i32_e32 v12, vcc, v0, v4 ; SI-NEXT: v_cndmask_b32_e64 v8, v8, v13, s[0:1] -; SI-NEXT: v_subrev_i32_e32 v13, vcc, v1, v5 +; SI-NEXT: v_subrev_i32_e32 v12, vcc, v0, v4 ; SI-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[2:3] -; SI-NEXT: v_subrev_i32_e32 v14, vcc, v2, v6 +; SI-NEXT: v_subrev_i32_e32 v13, vcc, v1, v5 ; SI-NEXT: v_cndmask_b32_e64 v10, v10, v17, s[4:5] -; SI-NEXT: v_subrev_i32_e32 v15, vcc, v3, v7 +; SI-NEXT: v_subrev_i32_e32 v14, vcc, v2, v6 ; SI-NEXT: v_cndmask_b32_e64 v11, v11, v19, s[6:7] +; SI-NEXT: v_sub_i32_e32 v15, vcc, v7, v3 ; SI-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1] ; SI-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; SI-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[2:3] @@ -755,8 +736,8 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; VI-NEXT: v_mul_hi_u32 v11, v10, v11 ; VI-NEXT: v_mul_hi_u32 v13, v12, v13 ; VI-NEXT: v_mul_hi_u32 v15, v14, v15 -; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v9 -; VI-NEXT: v_add_u32_e32 v9, vcc, v11, v10 +; VI-NEXT: v_add_u32_e32 v8, vcc, v9, v8 +; VI-NEXT: v_add_u32_e32 v9, vcc, v10, v11 ; VI-NEXT: v_add_u32_e32 v10, vcc, v12, v13 ; VI-NEXT: v_add_u32_e32 v11, vcc, v15, v14 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -769,8 +750,8 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; VI-NEXT: v_mul_lo_u32 v16, v10, v2 ; VI-NEXT: v_mul_lo_u32 v18, v11, v3 ; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v12 -; VI-NEXT: v_sub_u32_e32 v5, vcc, v5, v14 -; VI-NEXT: v_sub_u32_e32 v6, vcc, v6, v16 +; VI-NEXT: v_subrev_u32_e32 v5, vcc, v14, v5 +; VI-NEXT: v_subrev_u32_e32 v6, vcc, v16, v6 ; VI-NEXT: v_sub_u32_e32 v7, vcc, v7, v18 ; VI-NEXT: v_add_u32_e32 v13, vcc, 1, v8 ; VI-NEXT: v_add_u32_e32 v15, vcc, 1, v9 @@ -780,14 +761,14 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; VI-NEXT: v_cmp_ge_u32_e64 s[2:3], v5, v1 ; VI-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 ; VI-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; VI-NEXT: v_subrev_u32_e32 v12, vcc, v0, v4 ; VI-NEXT: v_cndmask_b32_e64 v8, v8, v13, s[0:1] -; VI-NEXT: v_subrev_u32_e32 v13, vcc, v1, v5 +; VI-NEXT: v_subrev_u32_e32 v12, vcc, v0, v4 ; VI-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[2:3] -; VI-NEXT: v_subrev_u32_e32 v14, vcc, v2, v6 +; VI-NEXT: v_subrev_u32_e32 v13, vcc, v1, v5 ; VI-NEXT: v_cndmask_b32_e64 v10, v10, v17, s[4:5] -; VI-NEXT: v_subrev_u32_e32 v15, vcc, v3, v7 +; VI-NEXT: v_subrev_u32_e32 v14, vcc, v2, v6 ; VI-NEXT: v_cndmask_b32_e64 v11, v11, v19, s[6:7] +; VI-NEXT: v_subrev_u32_e32 v15, vcc, v3, v7 ; VI-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[0:1] ; VI-NEXT: v_add_u32_e32 v12, vcc, 1, v8 ; VI-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[2:3] @@ -850,8 +831,8 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_mul_hi_u32 v13, v12, v13 ; GCN-NEXT: v_mul_hi_u32 v15, v14, v15 ; GCN-NEXT: v_mul_hi_u32 v17, v16, v17 -; GCN-NEXT: v_add_u32_e32 v10, vcc, v10, v11 -; GCN-NEXT: v_add_u32_e32 v11, vcc, v13, v12 +; GCN-NEXT: v_add_u32_e32 v10, vcc, v11, v10 +; GCN-NEXT: v_add_u32_e32 v11, vcc, v12, v13 ; GCN-NEXT: v_add_u32_e32 v12, vcc, v14, v15 ; GCN-NEXT: v_add_u32_e32 v13, vcc, v17, v16 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -864,8 +845,8 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_mul_lo_u32 v18, v12, v2 ; GCN-NEXT: v_mul_lo_u32 v19, v13, v3 ; GCN-NEXT: v_sub_u32_e32 v4, vcc, v4, v14 -; GCN-NEXT: v_sub_u32_e32 v5, vcc, v5, v16 -; GCN-NEXT: v_sub_u32_e32 v6, vcc, v6, v18 +; GCN-NEXT: v_subrev_u32_e32 v5, vcc, v16, v5 +; GCN-NEXT: v_subrev_u32_e32 v6, vcc, v18, v6 ; GCN-NEXT: v_sub_u32_e32 v7, vcc, v7, v19 ; GCN-NEXT: v_add_u32_e32 v15, vcc, 1, v10 ; GCN-NEXT: v_add_u32_e32 v17, vcc, 1, v11 @@ -875,136 +856,120 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad ; GCN-NEXT: v_cmp_ge_u32_e64 s[2:3], v5, v1 ; GCN-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 ; GCN-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; GCN-NEXT: v_subrev_u32_e32 v18, vcc, v0, v4 ; GCN-NEXT: v_cndmask_b32_e64 v10, v10, v15, s[0:1] -; GCN-NEXT: v_subrev_u32_e32 v15, vcc, v1, v5 +; GCN-NEXT: v_subrev_u32_e32 v15, vcc, v0, v4 ; GCN-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[2:3] -; GCN-NEXT: v_subrev_u32_e32 v17, vcc, v2, v6 +; GCN-NEXT: v_subrev_u32_e32 v17, vcc, v1, v5 ; GCN-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[4:5] -; GCN-NEXT: v_subrev_u32_e32 v14, vcc, v3, v7 +; GCN-NEXT: v_subrev_u32_e32 v14, vcc, v2, v6 ; GCN-NEXT: v_cndmask_b32_e64 v13, v13, v16, s[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v18, s[0:1] -; GCN-NEXT: v_add_u32_e32 v16, vcc, 1, v10 -; GCN-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[2:3] -; GCN-NEXT: v_add_u32_e32 v15, vcc, 1, v11 -; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v17, s[4:5] -; GCN-NEXT: v_add_u32_e32 v17, vcc, 1, v12 -; GCN-NEXT: v_cndmask_b32_e64 v7, v7, v14, s[6:7] -; GCN-NEXT: v_add_u32_e32 v14, vcc, 1, v13 +; GCN-NEXT: v_subrev_u32_e32 v16, vcc, v3, v7 +; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[0:1] +; GCN-NEXT: v_add_u32_e32 v15, vcc, 1, v10 +; GCN-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[2:3] +; GCN-NEXT: v_add_u32_e32 v17, vcc, 1, v11 +; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5] +; GCN-NEXT: v_add_u32_e32 v14, vcc, 1, v12 +; GCN-NEXT: v_cndmask_b32_e64 v7, v7, v16, s[6:7] +; GCN-NEXT: v_add_u32_e32 v16, vcc, 1, v13 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v10, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v10, v15, vcc ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 -; GCN-NEXT: v_cndmask_b32_e32 v1, v11, v15, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v11, v17, vcc ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 -; GCN-NEXT: v_cndmask_b32_e32 v2, v12, v17, vcc +; GCN-NEXT: v_cndmask_b32_e32 v2, v12, v14, vcc ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3 -; GCN-NEXT: v_cndmask_b32_e32 v3, v13, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v3, v13, v16, vcc ; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; GCN-NEXT: s_endpgm ; ; GFX1030-LABEL: udiv_v4i32: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX1030-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX1030-NEXT: v_mov_b32_e32 v8, 0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: s_clause 0x1 -; GFX1030-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] offset:16 -; GFX1030-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] +; GFX1030-NEXT: global_load_dwordx4 v[0:3], v8, s[6:7] offset:16 +; GFX1030-NEXT: global_load_dwordx4 v[4:7], v8, s[6:7] ; GFX1030-NEXT: s_waitcnt vmcnt(1) -; GFX1030-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1030-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v9, v0 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v10, v1 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v11, v2 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v12, v3 +; GFX1030-NEXT: v_sub_nc_u32_e32 v13, 0, v0 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v10, v10 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v11, v11 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v12, v12 +; GFX1030-NEXT: v_sub_nc_u32_e32 v14, 0, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v15, 0, v2 +; GFX1030-NEXT: v_sub_nc_u32_e32 v16, 0, v3 +; GFX1030-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 +; GFX1030-NEXT: v_mul_f32_e32 v10, 0x4f7ffffe, v10 +; GFX1030-NEXT: v_mul_f32_e32 v11, 0x4f7ffffe, v11 +; GFX1030-NEXT: v_mul_f32_e32 v12, 0x4f7ffffe, v12 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v12, v12 +; GFX1030-NEXT: v_mul_lo_u32 v13, v13, v9 +; GFX1030-NEXT: v_mul_lo_u32 v14, v14, v10 +; GFX1030-NEXT: v_mul_lo_u32 v15, v15, v11 +; GFX1030-NEXT: v_mul_lo_u32 v16, v16, v12 +; GFX1030-NEXT: v_mul_hi_u32 v13, v9, v13 +; GFX1030-NEXT: v_mul_hi_u32 v14, v10, v14 +; GFX1030-NEXT: v_mul_hi_u32 v15, v11, v15 +; GFX1030-NEXT: v_mul_hi_u32 v16, v12, v16 +; GFX1030-NEXT: v_add_nc_u32_e32 v9, v9, v13 +; GFX1030-NEXT: v_add_nc_u32_e32 v10, v10, v14 +; GFX1030-NEXT: v_add_nc_u32_e32 v11, v11, v15 +; GFX1030-NEXT: v_add_nc_u32_e32 v12, v12, v16 ; GFX1030-NEXT: s_waitcnt vmcnt(0) -; GFX1030-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v0, s2 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v1, s3 -; GFX1030-NEXT: s_sub_i32 s6, 0, s2 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX1030-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v0, s5 -; GFX1030-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1030-NEXT: s_mul_i32 s6, s6, s4 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX1030-NEXT: s_mul_hi_u32 s6, s4, s6 -; GFX1030-NEXT: s_add_i32 s4, s4, s6 -; GFX1030-NEXT: s_mul_hi_u32 s4, s7, s4 -; GFX1030-NEXT: s_mul_i32 s6, s4, s2 -; GFX1030-NEXT: s_sub_i32 s6, s7, s6 -; GFX1030-NEXT: s_add_i32 s7, s4, 1 -; GFX1030-NEXT: s_sub_i32 s8, s6, s2 -; GFX1030-NEXT: s_cmp_ge_u32 s6, s2 -; GFX1030-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX1030-NEXT: s_cselect_b32 s4, s7, s4 -; GFX1030-NEXT: s_cselect_b32 s6, s8, s6 -; GFX1030-NEXT: s_add_i32 s7, s4, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s6, s2 -; GFX1030-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1030-NEXT: s_cselect_b32 s4, s7, s4 -; GFX1030-NEXT: s_sub_i32 s6, 0, s3 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1030-NEXT: s_mul_i32 s6, s6, s9 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX1030-NEXT: s_mul_hi_u32 s6, s9, s6 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX1030-NEXT: s_add_i32 s9, s9, s6 -; GFX1030-NEXT: s_mul_hi_u32 s6, s7, s9 -; GFX1030-NEXT: v_readfirstlane_b32 s10, v0 -; GFX1030-NEXT: s_mul_i32 s8, s6, s3 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX1030-NEXT: s_sub_i32 s7, s7, s8 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_sub_i32 s9, s7, s3 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s3 -; GFX1030-NEXT: s_cselect_b32 s6, s8, s6 -; GFX1030-NEXT: s_cselect_b32 s7, s9, s7 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s3 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v6 -; GFX1030-NEXT: s_cselect_b32 s3, s8, s6 -; GFX1030-NEXT: s_sub_i32 s6, 0, s5 -; GFX1030-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v1 -; GFX1030-NEXT: s_mul_i32 s6, s6, s10 -; GFX1030-NEXT: v_mov_b32_e32 v1, s3 -; GFX1030-NEXT: s_mul_hi_u32 s6, s10, s6 -; GFX1030-NEXT: s_add_i32 s10, s10, s6 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX1030-NEXT: s_mul_hi_u32 s6, s7, s10 -; GFX1030-NEXT: s_mul_i32 s8, s6, s5 -; GFX1030-NEXT: s_sub_i32 s7, s7, s8 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_sub_i32 s9, s7, s5 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s5 -; GFX1030-NEXT: v_readfirstlane_b32 s10, v0 -; GFX1030-NEXT: s_cselect_b32 s6, s8, s6 -; GFX1030-NEXT: s_cselect_b32 s7, s9, s7 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s5 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v7 -; GFX1030-NEXT: s_cselect_b32 s5, s8, s6 -; GFX1030-NEXT: s_sub_i32 s6, 0, s2 -; GFX1030-NEXT: v_mov_b32_e32 v0, s4 -; GFX1030-NEXT: s_mul_i32 s6, s6, s10 -; GFX1030-NEXT: v_mov_b32_e32 v2, s5 -; GFX1030-NEXT: s_mul_hi_u32 s6, s10, s6 -; GFX1030-NEXT: s_add_i32 s10, s10, s6 -; GFX1030-NEXT: s_mul_hi_u32 s6, s7, s10 -; GFX1030-NEXT: s_mul_i32 s8, s6, s2 -; GFX1030-NEXT: s_sub_i32 s7, s7, s8 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_sub_i32 s9, s7, s2 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s2 -; GFX1030-NEXT: s_cselect_b32 s6, s8, s6 -; GFX1030-NEXT: s_cselect_b32 s7, s9, s7 -; GFX1030-NEXT: s_add_i32 s8, s6, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s7, s2 -; GFX1030-NEXT: s_cselect_b32 s2, s8, s6 -; GFX1030-NEXT: v_mov_b32_e32 v3, s2 -; GFX1030-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX1030-NEXT: v_mul_hi_u32 v9, v4, v9 +; GFX1030-NEXT: v_mul_hi_u32 v10, v5, v10 +; GFX1030-NEXT: v_mul_hi_u32 v11, v6, v11 +; GFX1030-NEXT: v_mul_hi_u32 v12, v7, v12 +; GFX1030-NEXT: v_mul_lo_u32 v13, v9, v0 +; GFX1030-NEXT: v_mul_lo_u32 v14, v10, v1 +; GFX1030-NEXT: v_mul_lo_u32 v15, v11, v2 +; GFX1030-NEXT: v_mul_lo_u32 v16, v12, v3 +; GFX1030-NEXT: v_add_nc_u32_e32 v17, 1, v9 +; GFX1030-NEXT: v_add_nc_u32_e32 v18, 1, v10 +; GFX1030-NEXT: v_add_nc_u32_e32 v19, 1, v11 +; GFX1030-NEXT: v_sub_nc_u32_e32 v4, v4, v13 +; GFX1030-NEXT: v_sub_nc_u32_e32 v5, v5, v14 +; GFX1030-NEXT: v_sub_nc_u32_e32 v6, v6, v15 +; GFX1030-NEXT: v_sub_nc_u32_e32 v7, v7, v16 +; GFX1030-NEXT: v_add_nc_u32_e32 v13, 1, v12 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v4, v0 +; GFX1030-NEXT: v_sub_nc_u32_e32 v14, v4, v0 +; GFX1030-NEXT: v_cmp_ge_u32_e64 s0, v5, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v15, v5, v1 +; GFX1030-NEXT: v_cmp_ge_u32_e64 s1, v6, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e64 v10, v10, v18, s0 +; GFX1030-NEXT: v_sub_nc_u32_e32 v16, v6, v2 +; GFX1030-NEXT: v_cmp_ge_u32_e64 s2, v7, v3 +; GFX1030-NEXT: v_add_nc_u32_e32 v14, 1, v9 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, v5, v15, s0 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v4, v0 +; GFX1030-NEXT: v_cndmask_b32_e64 v11, v11, v19, s1 +; GFX1030-NEXT: v_cndmask_b32_e64 v12, v12, v13, s2 +; GFX1030-NEXT: v_sub_nc_u32_e32 v13, v7, v3 +; GFX1030-NEXT: v_add_nc_u32_e32 v15, 1, v10 +; GFX1030-NEXT: v_cndmask_b32_e64 v6, v6, v16, s1 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v9, v14, vcc_lo +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v5, v1 +; GFX1030-NEXT: v_add_nc_u32_e32 v16, 1, v11 +; GFX1030-NEXT: v_cndmask_b32_e64 v7, v7, v13, s2 +; GFX1030-NEXT: v_add_nc_u32_e32 v13, 1, v12 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v10, v15, vcc_lo +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v6, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, v11, v16, vcc_lo +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v7, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v3, v12, v13, vcc_lo +; GFX1030-NEXT: global_store_dwordx4 v8, v[0:3], s[4:5] ; GFX1030-NEXT: s_endpgm ; ; EG-LABEL: udiv_v4i32: @@ -1890,11 +1855,11 @@ define amdgpu_kernel void @v_udiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; SI-NEXT: v_mul_hi_u32 v1, v2, v1 ; SI-NEXT: v_mul_lo_u32 v3, v1, v0 ; SI-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; SI-NEXT: v_subrev_i32_e32 v2, vcc, v3, v2 +; SI-NEXT: v_cmp_ge_u32_e64 s[0:1], v2, v0 +; SI-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] ; SI-NEXT: v_subrev_i32_e32 v3, vcc, v0, v2 -; SI-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 -; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; SI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; SI-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] ; SI-NEXT: v_add_i32_e32 v3, vcc, 1, v1 ; SI-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 ; SI-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc @@ -1938,10 +1903,10 @@ define amdgpu_kernel void @v_udiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; VI-NEXT: v_mul_lo_u32 v3, v1, v0 ; VI-NEXT: v_add_u32_e32 v4, vcc, 1, v1 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 -; VI-NEXT: v_subrev_u32_e32 v3, vcc, v0, v2 -; VI-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 -; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; VI-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; VI-NEXT: v_cmp_ge_u32_e64 s[0:1], v2, v0 +; VI-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] +; VI-NEXT: v_sub_u32_e32 v3, vcc, v2, v0 +; VI-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] ; VI-NEXT: v_add_u32_e32 v3, vcc, 1, v1 ; VI-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 ; VI-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc @@ -1993,10 +1958,10 @@ define amdgpu_kernel void @v_udiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; GCN-NEXT: v_mul_lo_u32 v5, v4, v3 ; GCN-NEXT: v_add_u32_e32 v6, vcc, 1, v4 ; GCN-NEXT: v_sub_u32_e32 v2, vcc, v2, v5 -; GCN-NEXT: v_subrev_u32_e32 v5, vcc, v3, v2 -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v2, v3 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v2, v3 +; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] +; GCN-NEXT: v_sub_u32_e32 v5, vcc, v2, v3 +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GCN-NEXT: v_add_u32_e32 v5, vcc, 1, v4 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v2, v3 ; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc @@ -2015,39 +1980,32 @@ define amdgpu_kernel void @v_udiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* ; GFX1030-NEXT: global_load_ubyte v3, v0, s[2:3] offset:2 ; GFX1030-NEXT: global_load_ushort v4, v0, s[2:3] ; GFX1030-NEXT: s_waitcnt vmcnt(3) -; GFX1030-NEXT: v_readfirstlane_b32 s2, v1 -; GFX1030-NEXT: s_waitcnt vmcnt(2) -; GFX1030-NEXT: v_readfirstlane_b32 s3, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX1030-NEXT: s_waitcnt vmcnt(1) -; GFX1030-NEXT: v_readfirstlane_b32 s4, v3 +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX1030-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX1030-NEXT: s_waitcnt vmcnt(0) -; GFX1030-NEXT: v_readfirstlane_b32 s5, v4 -; GFX1030-NEXT: s_lshl_b32 s2, s2, 16 -; GFX1030-NEXT: s_or_b32 s2, s3, s2 -; GFX1030-NEXT: s_lshl_b32 s4, s4, 16 -; GFX1030-NEXT: v_cvt_f32_u32_e32 v1, s2 -; GFX1030-NEXT: s_sub_i32 s6, 0, s2 -; GFX1030-NEXT: s_or_b32 s4, s5, s4 -; GFX1030-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX1030-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX1030-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX1030-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1030-NEXT: s_mul_i32 s6, s6, s3 -; GFX1030-NEXT: s_mul_hi_u32 s6, s3, s6 -; GFX1030-NEXT: s_add_i32 s3, s3, s6 -; GFX1030-NEXT: s_mul_hi_u32 s3, s4, s3 -; GFX1030-NEXT: s_mul_i32 s5, s3, s2 -; GFX1030-NEXT: s_sub_i32 s4, s4, s5 -; GFX1030-NEXT: s_add_i32 s5, s3, 1 -; GFX1030-NEXT: s_sub_i32 s6, s4, s2 -; GFX1030-NEXT: s_cmp_ge_u32 s4, s2 -; GFX1030-NEXT: s_cselect_b32 s3, s5, s3 -; GFX1030-NEXT: s_cselect_b32 s4, s6, s4 -; GFX1030-NEXT: s_add_i32 s5, s3, 1 -; GFX1030-NEXT: s_cmp_ge_u32 s4, s2 -; GFX1030-NEXT: s_cselect_b32 s2, s5, s3 -; GFX1030-NEXT: s_and_b32 s2, s2, 0xffffff -; GFX1030-NEXT: v_mov_b32_e32 v1, s2 +; GFX1030-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX1030-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v5, 0, v1 +; GFX1030-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX1030-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX1030-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX1030-NEXT: v_mul_lo_u32 v5, v5, v2 +; GFX1030-NEXT: v_mul_hi_u32 v5, v2, v5 +; GFX1030-NEXT: v_add_nc_u32_e32 v2, v2, v5 +; GFX1030-NEXT: v_mul_hi_u32 v2, v3, v2 +; GFX1030-NEXT: v_mul_lo_u32 v4, v2, v1 +; GFX1030-NEXT: v_sub_nc_u32_e32 v3, v3, v4 +; GFX1030-NEXT: v_add_nc_u32_e32 v4, 1, v2 +; GFX1030-NEXT: v_sub_nc_u32_e32 v5, v3, v1 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v3, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo +; GFX1030-NEXT: v_add_nc_u32_e32 v4, 1, v2 +; GFX1030-NEXT: v_cmp_ge_u32_e32 vcc_lo, v3, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1030-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX1030-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1030-NEXT: s_endpgm ; @@ -2394,7 +2352,7 @@ define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readon ; SI-NEXT: v_cvt_i32_f32_e32 v1, v1 ; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -2420,7 +2378,7 @@ define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readon ; VI-NEXT: v_cvt_i32_f32_e32 v1, v1 ; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -2444,7 +2402,7 @@ define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readon ; GCN-NEXT: v_cvt_i32_f32_e32 v3, v3 ; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v4| ; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-NEXT: v_add_u32_e32 v2, vcc, v3, v2 +; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GCN-NEXT: flat_store_byte v[0:1], v2 ; GCN-NEXT: s_endpgm ; @@ -2564,7 +2522,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_mul_lo_u32 v5, v3, s4 ; SI-NEXT: v_mul_lo_u32 v6, v2, s4 ; SI-NEXT: s_mov_b32 s4, 0x186a0 -; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v2 +; SI-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; SI-NEXT: v_mul_lo_u32 v5, v2, v4 ; SI-NEXT: v_mul_hi_u32 v7, v2, v6 @@ -2640,8 +2598,8 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; VI-NEXT: v_cvt_u32_f32_e32 v7, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v6 -; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 +; VI-NEXT: v_add_u32_e32 v8, vcc, v3, v4 ; VI-NEXT: v_mul_hi_u32 v5, v6, v2 ; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 ; VI-NEXT: v_add_u32_e32 v9, vcc, v5, v3 @@ -2657,7 +2615,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; VI-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v6 +; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 ; VI-NEXT: v_add_u32_e32 v5, vcc, v3, v4 ; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 ; VI-NEXT: v_mul_hi_u32 v8, v6, v2 @@ -2727,8 +2685,8 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GCN-NEXT: v_cvt_u32_f32_e32 v7, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 -; GCN-NEXT: v_sub_u32_e32 v3, vcc, v3, v6 -; GCN-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 +; GCN-NEXT: v_add_u32_e32 v8, vcc, v3, v4 ; GCN-NEXT: v_mul_hi_u32 v5, v6, v2 ; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 ; GCN-NEXT: v_add_u32_e32 v9, vcc, v5, v3 @@ -2744,7 +2702,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 -; GCN-NEXT: v_sub_u32_e32 v3, vcc, v3, v6 +; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 ; GCN-NEXT: v_add_u32_e32 v5, vcc, v3, v4 ; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 ; GCN-NEXT: v_mul_hi_u32 v8, v6, v2 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 3c3c77b1a5331..a9196aa575ee6 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -48,9 +48,9 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_mul_hi_u32 v3, s4, v0 ; GCN-NEXT: v_mul_lo_u32 v4, s5, v0 ; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_mul_lo_u32 v3, s4, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -100,13 +100,12 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] ; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 ; GCN-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] -; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 1, v0 +; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GCN-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] -; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 2, v0 +; GCN-NEXT: v_add_i32_e64 v7, s[0:1], 1, v0 ; GCN-NEXT: v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 -; GCN-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v5, v6, v8, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GCN-NEXT: v_mov_b32_e32 v6, s3 ; GCN-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 @@ -116,8 +115,9 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s9, v2 ; GCN-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; @@ -125,41 +125,38 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: s_flbit_i32_b32 s10, s4 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s11, s5 -; GCN-IR-NEXT: s_add_i32 s10, s10, 32 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s10, s11 -; GCN-IR-NEXT: s_min_u32 s14, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s12, s10, s14 -; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[8:9], s[16:17] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[16:17], exec -; GCN-IR-NEXT: s_cselect_b32 s9, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s8, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 +; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s12, s4 +; GCN-IR-NEXT: s_add_i32 s14, s12, 32 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[10:11] +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s5 +; GCN-IR-NEXT: s_min_u32 s10, s14, s8 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s14, s8, s9 +; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 +; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s16, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s17, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[16:17], 0 -; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 +; GCN-IR-NEXT: s_add_u32 s12, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s16 +; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s12 ; GCN-IR-NEXT: s_add_u32 s15, s4, -1 ; GCN-IR-NEXT: s_addc_u32 s16, s5, -1 ; GCN-IR-NEXT: s_not_b64 s[2:3], s[10:11] @@ -190,12 +187,18 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[8:9], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[2:3] -; GCN-IR-NEXT: .LBB0_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 +; GCN-IR-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB0_6 +; GCN-IR-NEXT: .LBB0_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[12:13] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[12:13] +; GCN-IR-NEXT: .LBB0_6: ; %udiv-end ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s9 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %result = udiv i64 %x, %y @@ -668,36 +671,36 @@ define amdgpu_kernel void @s_test_udiv23_i64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 %y) { ; GCN-LABEL: s_test_udiv24_i48: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_and_b32 s0, s2, 0xff000000 -; GCN-NEXT: s_and_b32 s1, s3, 0xffff -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GCN-NEXT: s_and_b32 s4, s4, 0xff000000 +; GCN-NEXT: s_and_b32 s5, s5, 0xffff +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_alignbit_b32 v0, s5, v0, 24 ; GCN-NEXT: v_cvt_f32_u32_e32 v1, v0 -; GCN-NEXT: s_and_b32 s7, s7, 0xffff -; GCN-NEXT: s_and_b32 s6, s6, 0xff000000 -; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 24 +; GCN-NEXT: s_and_b32 s8, s3, 0xffff +; GCN-NEXT: s_and_b32 s9, s2, 0xff000000 +; GCN-NEXT: s_lshr_b64 s[2:3], s[4:5], 24 ; GCN-NEXT: v_mac_f32_e32 v1, 0, v2 ; GCN-NEXT: v_rcp_f32_e32 v1, v1 -; GCN-NEXT: s_sub_u32 s8, 0, s0 -; GCN-NEXT: s_subb_u32 s9, 0, s1 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_sub_u32 s2, 0, s2 +; GCN-NEXT: s_subb_u32 s3, 0, s3 +; GCN-NEXT: s_mov_b32 s4, s0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; GCN-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 ; GCN-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GCN-NEXT: s_mov_b32 s2, -1 -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: v_mul_lo_u32 v3, s8, v2 -; GCN-NEXT: v_mul_hi_u32 v4, s8, v1 -; GCN-NEXT: v_mul_lo_u32 v5, s9, v1 -; GCN-NEXT: v_mul_lo_u32 v6, s8, v1 -; GCN-NEXT: s_mov_b32 s1, s5 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mul_lo_u32 v3, s2, v2 +; GCN-NEXT: v_mul_hi_u32 v4, s2, v1 +; GCN-NEXT: v_mul_lo_u32 v5, s3, v1 +; GCN-NEXT: v_mul_lo_u32 v6, s2, v1 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v5, v3 ; GCN-NEXT: v_mul_lo_u32 v4, v1, v3 @@ -716,11 +719,11 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; GCN-NEXT: v_mul_lo_u32 v3, s8, v2 -; GCN-NEXT: v_mul_hi_u32 v4, s8, v1 -; GCN-NEXT: v_mul_lo_u32 v5, s9, v1 +; GCN-NEXT: v_mul_lo_u32 v3, s2, v2 +; GCN-NEXT: v_mul_hi_u32 v4, s2, v1 +; GCN-NEXT: v_mul_lo_u32 v5, s3, v1 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3 -; GCN-NEXT: v_mul_lo_u32 v4, s8, v1 +; GCN-NEXT: v_mul_lo_u32 v4, s2, v1 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v5, v3 ; GCN-NEXT: v_mul_lo_u32 v7, v1, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v1, v4 @@ -737,9 +740,9 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc ; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v3 -; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_mov_b32_e32 v3, s9 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; GCN-NEXT: v_alignbit_b32 v3, s7, v3, 24 +; GCN-NEXT: v_alignbit_b32 v3, s8, v3, 24 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v2 ; GCN-NEXT: v_mul_hi_u32 v1, v3, v1 ; GCN-NEXT: v_mul_hi_u32 v2, v3, v2 @@ -753,12 +756,12 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v1 -; GCN-NEXT: v_add_i32_e32 v4, vcc, 1, v1 +; GCN-NEXT: v_add_i32_e32 v4, vcc, 2, v1 ; GCN-NEXT: v_mul_lo_u32 v10, v0, v1 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v2, vcc -; GCN-NEXT: v_add_i32_e32 v8, vcc, 2, v1 +; GCN-NEXT: v_add_i32_e32 v8, vcc, 1, v1 ; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v2, vcc -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GCN-NEXT: v_sub_i32_e32 v3, vcc, v3, v10 ; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v6, vcc ; GCN-NEXT: v_sub_i32_e32 v7, vcc, v3, v0 @@ -766,25 +769,24 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v7, v0 ; GCN-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GCN-NEXT: v_cmp_ge_u32_e64 s[0:1], v3, v0 ; GCN-NEXT: v_cndmask_b32_e32 v7, -1, v7, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; GCN-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc -; GCN-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, -1, v0, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v1, v9, v5, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[0:1] +; GCN-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_udiv24_i48: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_and_b32 s3, s5, 0xffff ; GCN-IR-NEXT: s_and_b32 s2, s4, 0xff000000 @@ -794,39 +796,37 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_lshr_b64 s[2:3], s[4:5], 24 ; GCN-IR-NEXT: s_and_b32 s9, s9, 0xffff ; GCN-IR-NEXT: s_and_b32 s3, s3, 0xffff -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[8:9], 0 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_min_u32 s10, s4, s5 -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s8 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s9 -; GCN-IR-NEXT: s_min_u32 s14, s4, s5 -; GCN-IR-NEXT: s_sub_u32 s12, s10, s14 -; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[6:7], s[16:17] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[16:17], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s9 -; GCN-IR-NEXT: s_cselect_b32 s6, 0, s8 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[10:11] +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_min_u32 s10, s6, s7 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s8 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s9 +; GCN-IR-NEXT: s_min_u32 s14, s6, s7 +; GCN-IR-NEXT: s_sub_u32 s6, s10, s14 +; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s16, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s17, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[16:17], 0 -; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], s12 +; GCN-IR-NEXT: s_add_u32 s12, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s13, s7, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 0 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[8:9], s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[8:9], s16 +; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 ; GCN-IR-NEXT: s_add_u32 s15, s2, -1 ; GCN-IR-NEXT: s_addc_u32 s16, s3, -1 ; GCN-IR-NEXT: s_not_b64 s[4:5], s[10:11] @@ -857,16 +857,22 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow3 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[2:3] -; GCN-IR-NEXT: .LBB7_5: ; %udiv-end +; GCN-IR-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB7_6 +; GCN-IR-NEXT: .LBB7_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s9 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[12:13] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[12:13] +; GCN-IR-NEXT: .LBB7_6: ; %udiv-end ; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s7 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s6 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 -; GCN-IR-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; GCN-IR-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 +; GCN-IR-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %1 = lshr i48 %x, 24 %2 = lshr i48 %y, 24 @@ -961,57 +967,54 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v3 ; GCN-NEXT: v_cndmask_b32_e64 v3, v5, v4, s[0:1] -; GCN-NEXT: v_add_i32_e64 v4, s[0:1], 1, v0 +; GCN-NEXT: v_add_i32_e64 v4, s[0:1], 2, v0 ; GCN-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1] -; GCN-NEXT: v_add_i32_e64 v6, s[0:1], 2, v0 +; GCN-NEXT: v_add_i32_e64 v6, s[0:1], 1, v0 ; GCN-NEXT: v_addc_u32_e64 v7, s[0:1], 0, 0, s[0:1] ; GCN-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v3 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v6, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 ; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v6, v4, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_udiv_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_min_u32 s8, s8, s9 -; GCN-IR-NEXT: s_add_u32 s10, s8, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[12:13], exec -; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] -; GCN-IR-NEXT: s_mov_b32 s7, 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 +; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s12, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[12:13], 0 -; GCN-IR-NEXT: s_sub_i32 s9, 63, s10 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s9 +; GCN-IR-NEXT: s_add_u32 s10, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s11, s7, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[10:11], 0 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s12 +; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 ; GCN-IR-NEXT: s_add_u32 s14, s2, -1 ; GCN-IR-NEXT: s_addc_u32 s15, s3, -1 ; GCN-IR-NEXT: s_sub_u32 s8, 58, s8 @@ -1041,12 +1044,16 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[2:3] -; GCN-IR-NEXT: .LBB8_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB8_6 +; GCN-IR-NEXT: .LBB8_5: +; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 24, 0, s[10:11] +; GCN-IR-NEXT: .LBB8_6: ; %udiv-end ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %result = udiv i64 24, %x @@ -1329,22 +1336,21 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mov_b32_e32 v0, 0x4f800000 ; GCN-NEXT: v_madak_f32 v0, 0, v0, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 -; GCN-NEXT: s_movk_i32 s8, 0xffe8 -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_movk_i32 s4, 0xffe8 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 ; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: v_mul_hi_u32 v2, v0, s8 -; GCN-NEXT: v_mul_lo_u32 v4, v1, s8 -; GCN-NEXT: v_mul_lo_u32 v3, v0, s8 -; GCN-NEXT: s_mov_b32 s1, s5 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mul_hi_u32 v2, v0, s4 +; GCN-NEXT: v_mul_lo_u32 v4, v1, s4 +; GCN-NEXT: v_mul_lo_u32 v3, v0, s4 ; GCN-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v3 @@ -1363,11 +1369,12 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GCN-NEXT: v_mul_hi_u32 v2, v0, s8 -; GCN-NEXT: v_mul_lo_u32 v3, v1, s8 -; GCN-NEXT: v_mul_lo_u32 v4, v0, s8 -; GCN-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GCN-NEXT: v_mul_hi_u32 v2, v0, s4 +; GCN-NEXT: v_mul_lo_u32 v3, v1, s4 +; GCN-NEXT: v_mul_lo_u32 v4, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_mul_lo_u32 v3, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -1384,15 +1391,15 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GCN-NEXT: v_mul_lo_u32 v2, s6, v1 -; GCN-NEXT: v_mul_hi_u32 v3, s6, v0 -; GCN-NEXT: v_mul_hi_u32 v4, s6, v1 -; GCN-NEXT: v_mul_hi_u32 v5, s7, v1 -; GCN-NEXT: v_mul_lo_u32 v1, s7, v1 +; GCN-NEXT: v_mul_lo_u32 v2, s2, v1 +; GCN-NEXT: v_mul_hi_u32 v3, s2, v0 +; GCN-NEXT: v_mul_hi_u32 v4, s2, v1 +; GCN-NEXT: v_mul_hi_u32 v5, s3, v1 +; GCN-NEXT: v_mul_lo_u32 v1, s3, v1 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GCN-NEXT: v_mul_lo_u32 v4, s7, v0 -; GCN-NEXT: v_mul_hi_u32 v0, s7, v0 +; GCN-NEXT: v_mul_lo_u32 v4, s3, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s3, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc @@ -1400,14 +1407,14 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v1, 24 ; GCN-NEXT: v_mul_hi_u32 v5, v0, 24 -; GCN-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GCN-NEXT: v_add_i32_e32 v2, vcc, 2, v0 ; GCN-NEXT: v_mul_lo_u32 v8, v0, 24 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GCN-NEXT: v_add_i32_e32 v6, vcc, 2, v0 +; GCN-NEXT: v_add_i32_e32 v6, vcc, 1, v0 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GCN-NEXT: v_mov_b32_e32 v5, s7 -; GCN-NEXT: v_sub_i32_e32 v8, vcc, s6, v8 +; GCN-NEXT: v_mov_b32_e32 v5, s3 +; GCN-NEXT: v_sub_i32_e32 v8, vcc, s2, v8 ; GCN-NEXT: v_subb_u32_e32 v4, vcc, v5, v4, vcc ; GCN-NEXT: v_subrev_i32_e32 v5, vcc, 24, v8 ; GCN-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v4, vcc @@ -1415,17 +1422,17 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 ; GCN-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], 23, v8 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 23, v8 -; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GCN-NEXT: v_cndmask_b32_e32 v4, -1, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 +; GCN-NEXT: v_cndmask_b32_e64 v4, -1, v5, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v4 +; GCN-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_udiv_k_den_i64: @@ -1436,29 +1443,26 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 ; GCN-IR-NEXT: s_add_i32 s6, s6, 32 ; GCN-IR-NEXT: s_min_u32 s10, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s8, 59, s10 -; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 +; GCN-IR-NEXT: s_sub_u32 s6, 59, s10 +; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[8:9], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[8:9], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s12, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[12:13], 0 -; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s8 +; GCN-IR-NEXT: s_add_u32 s8, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s9, s7, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 0 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[8:9], s[2:3], s12 +; GCN-IR-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 ; GCN-IR-NEXT: s_add_u32 s2, s10, 0xffffffc4 ; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 @@ -1485,12 +1489,18 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_3 ; GCN-IR-NEXT: .LBB11_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[2:3] -; GCN-IR-NEXT: .LBB11_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s3 +; GCN-IR-NEXT: s_branch .LBB11_6 +; GCN-IR-NEXT: .LBB11_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[8:9] +; GCN-IR-NEXT: .LBB11_6: ; %udiv-end ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 -; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-IR-NEXT: s_endpgm %result = udiv i64 %x, 24 diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll index e1a79bf1e9c7a..58c1445b9f303 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -36,84 +36,77 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, [8 x i32], i32 ; ; GFX6-LABEL: test_udivrem: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s8, s[0:1], 0x26 -; GFX6-NEXT: s_load_dword s9, s[0:1], 0x1d +; GFX6-NEXT: s_load_dword s2, s[0:1], 0x26 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x13 +; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x13 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s10, s6 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX6-NEXT: s_sub_i32 s2, 0, s8 -; GFX6-NEXT: s_mov_b32 s3, s7 +; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX6-NEXT: s_sub_i32 s3, 0, s2 +; GFX6-NEXT: s_mov_b32 s11, s7 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: s_mov_b32 s2, s6 +; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x1d ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX6-NEXT: v_readfirstlane_b32 s10, v0 -; GFX6-NEXT: s_mul_i32 s10, s10, s8 -; GFX6-NEXT: s_sub_i32 s9, s9, s10 -; GFX6-NEXT: s_sub_i32 s10, s9, s8 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s9, s8 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: s_cselect_b32 s9, s10, s9 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_sub_i32 s10, s9, s8 -; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0 -; GFX6-NEXT: s_cmp_ge_u32 s9, s8 -; GFX6-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX6-NEXT: s_cselect_b32 s8, s10, s9 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v0, s2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s3, v1 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v1 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s8 -; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v1, v2, s[0:1] +; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: test_udivrem: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x98 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x74 +; GFX8-NEXT: s_load_dword s6, s[0:1], 0x98 +; GFX8-NEXT: s_load_dword s7, s[0:1], 0x74 +; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s4 -; GFX8-NEXT: s_sub_i32 s2, 0, s4 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX8-NEXT: s_sub_i32 s2, 0, s6 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_lo_u32 v1, s2, v0 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x4c ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GFX8-NEXT: v_mul_hi_u32 v4, s5, v0 +; GFX8-NEXT: v_mul_hi_u32 v2, s7, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_readfirstlane_b32 s0, v4 -; GFX8-NEXT: s_mul_i32 s0, s0, s4 -; GFX8-NEXT: s_sub_i32 s0, s5, s0 -; GFX8-NEXT: s_sub_i32 s1, s0, s4 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, 1, v4 -; GFX8-NEXT: s_cmp_ge_u32 s0, s4 -; GFX8-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX8-NEXT: s_cselect_b32 s0, s1, s0 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GFX8-NEXT: s_sub_i32 s1, s0, s4 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, 1, v4 -; GFX8-NEXT: s_cmp_ge_u32 s0, s4 -; GFX8-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GFX8-NEXT: s_cselect_b32 s0, s1, s0 -; GFX8-NEXT: flat_store_dword v[0:1], v4 -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s7, v3 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v3 +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s6, v3 +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v3 +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s6, v3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[0:1] +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm %result0 = udiv i32 %x, %y store i32 %result0, i32 addrspace(1)* %out0 @@ -165,47 +158,43 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s7 ; GFX6-NEXT: s_sub_i32 s2, 0, s6 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s7 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s6 -; GFX6-NEXT: s_sub_i32 s2, s4, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s6 -; GFX6-NEXT: s_cmp_ge_u32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s6 -; GFX6-NEXT: s_cmp_ge_u32 s2, s6 -; GFX6-NEXT: s_cselect_b32 s4, s3, s2 +; GFX6-NEXT: v_mul_lo_u32 v2, s2, v0 ; GFX6-NEXT: s_sub_i32 s2, 0, s7 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: v_mul_lo_u32 v3, s2, v1 ; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_readfirstlane_b32 s6, v0 -; GFX6-NEXT: s_mul_i32 s6, s6, s7 -; GFX6-NEXT: s_sub_i32 s5, s5, s6 -; GFX6-NEXT: s_sub_i32 s6, s5, s7 -; GFX6-NEXT: s_cmp_ge_u32 s5, s7 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 -; GFX6-NEXT: s_sub_i32 s6, s5, s7 -; GFX6-NEXT: s_cmp_ge_u32 s5, s7 -; GFX6-NEXT: s_cselect_b32 s5, s6, s5 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s6 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s7 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s6, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s6, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s7, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -241,7 +230,7 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; GFX8-NEXT: v_mul_lo_u32 v0, s3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0 ; GFX8-NEXT: v_mul_hi_u32 v1, s5, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_readfirstlane_b32 s2, v1 @@ -334,85 +323,77 @@ define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i3 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX6-NEXT: s_sub_i32 s2, 0, s8 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s9 +; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s9 +; GFX6-NEXT: s_sub_i32 s12, 0, s8 +; GFX6-NEXT: s_sub_i32 s13, 0, s9 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f32_u32_e32 v3, s10 +; GFX6-NEXT: v_cvt_f32_u32_e32 v5, s11 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s10 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s8 -; GFX6-NEXT: s_sub_i32 s2, s4, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s8 -; GFX6-NEXT: s_cmp_ge_u32 s2, s8 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s8 -; GFX6-NEXT: s_cmp_ge_u32 s2, s8 -; GFX6-NEXT: s_cselect_b32 s4, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s9 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_cvt_f32_u32_e32 v2, s11 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s9 -; GFX6-NEXT: s_sub_i32 s2, s5, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s9 -; GFX6-NEXT: s_cmp_ge_u32 s2, s9 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s9 -; GFX6-NEXT: s_cmp_ge_u32 s2, s9 -; GFX6-NEXT: s_cselect_b32 s5, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s10 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s2, s2, s10 -; GFX6-NEXT: s_sub_i32 s2, s6, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s10 -; GFX6-NEXT: s_cmp_ge_u32 s2, s10 -; GFX6-NEXT: s_cselect_b32 s2, s3, s2 -; GFX6-NEXT: s_sub_i32 s3, s2, s10 -; GFX6-NEXT: s_cmp_ge_u32 s2, s10 -; GFX6-NEXT: s_cselect_b32 s6, s3, s2 -; GFX6-NEXT: s_sub_i32 s2, 0, s11 -; GFX6-NEXT: v_mul_lo_u32 v0, s2, v1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_mul_hi_u32 v2, s7, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s5 -; GFX6-NEXT: v_readfirstlane_b32 s4, v2 -; GFX6-NEXT: s_mul_i32 s4, s4, s11 -; GFX6-NEXT: s_sub_i32 s4, s7, s4 -; GFX6-NEXT: s_sub_i32 s5, s4, s11 -; GFX6-NEXT: s_cmp_ge_u32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: s_sub_i32 s5, s4, s11 -; GFX6-NEXT: s_cmp_ge_u32 s4, s11 -; GFX6-NEXT: s_cselect_b32 s4, s5, s4 -; GFX6-NEXT: v_mov_b32_e32 v2, s6 -; GFX6-NEXT: v_mov_b32_e32 v3, s4 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, s12, v0 +; GFX6-NEXT: v_mul_lo_u32 v4, s13, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v4, v1, v4 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, s8 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, s9 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s8, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GFX6-NEXT: s_sub_i32 s4, 0, s10 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_mul_lo_u32 v3, s4, v2 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s9, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; GFX6-NEXT: s_sub_i32 s4, 0, s11 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v4 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s9, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, s6, v2 +; GFX6-NEXT: v_mul_lo_u32 v5, s4, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-NEXT: v_mul_lo_u32 v2, v2, s10 +; GFX6-NEXT: v_mul_hi_u32 v4, v3, v5 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s6, v2 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_mul_hi_u32 v3, s7, v3 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s10, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, s11 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s10, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s7, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s11, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s11, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -468,7 +449,7 @@ define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i3 ; GFX8-NEXT: v_mul_lo_u32 v0, s4, v1 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: v_mul_hi_u32 v0, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v0 ; GFX8-NEXT: v_mul_hi_u32 v0, s6, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 ; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index a7b85f62b0504..b55a429cc7a89 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -76,9 +76,12 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1) ; GCN-LABEL: {{^}}uint_to_fp_i1_to_f64: ; VI-DAG: s_cmp_eq_u32 -; GCN-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0x3ff00000, 0 +; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0x3ff00000, 0 +; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] +; SI-DAG: s_cmp_eq_u32 +; SI-DAG: s_cselect_b64 vcc, -1, 0 +; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, {{v[0-9]+}}, vcc ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]] ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[ZERO]]:[[SEL]]] ; GCN: s_endpgm define amdgpu_kernel void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir index 97c63bdfcd1a7..71b5afef41146 100644 --- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir @@ -70,16 +70,16 @@ body: | ; CHECK-NEXT: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) - ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; CHECK-NEXT: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; CHECK-NEXT: KILL killed renamable $sgpr92_sgpr93_sgpr94_sgpr95 ; CHECK-NEXT: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 @@ -147,15 +147,15 @@ body: | %23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0 :: (load (s128), addrspace 6) %24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load (s128), addrspace 6) %25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0 :: (load (s128), addrspace 6) - %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) %35:vgpr_32 = nofpexcept V_MAX_F32_e32 %26, %27, implicit $mode, implicit $exec %36:vgpr_32 = V_MAX3_F32_e64 0, %35, 0, %28, 0, %29, 0, 0, implicit $mode, implicit $exec %37:vgpr_32 = nofpexcept V_ADD_F32_e32 -1083321614, %31, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir index 103a8c025fe23..6fe5746f571c3 100644 --- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -17,10 +17,10 @@ # leaving a spill of the undefined register. # CHECK-LABEL: name: undefined_physreg_sgpr_spill -# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec # CHECK-NEXT: SI_SPILL_S64_SAVE $sgpr0_sgpr1, # CHECK-NEXT: $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc -# CHECK: $exec = COPY killed $sgpr2_sgpr3 +# CHECK: $exec = PRED_COPY killed $sgpr2_sgpr3 name: undefined_physreg_sgpr_spill alignment: 1 exposesReturnsTwice: false @@ -43,12 +43,12 @@ body: | successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec + $vgpr1_vgpr2 = PRED_COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec - $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 @@ -74,7 +74,7 @@ body: | bb.3: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 - $vcc = COPY $vgpr1 + $vcc = PRED_COPY $vgpr1 S_ENDPGM 0 ... @@ -82,10 +82,10 @@ body: | # Move spill to after future save instruction # CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder -# CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec +# CHECK: $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc # CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) -# CHECK: $exec = COPY killed $sgpr2_sgpr3 +# CHECK: $exec = PRED_COPY killed $sgpr2_sgpr3 name: undefined_physreg_sgpr_spill_reorder alignment: 1 exposesReturnsTwice: false @@ -108,12 +108,12 @@ body: | successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec + $vgpr1_vgpr2 = PRED_COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec - $sgpr0_sgpr1 = COPY $exec, implicit-def $exec + $sgpr0_sgpr1 = PRED_COPY $exec, implicit-def $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $exec = S_MOV_B64_term killed $sgpr2_sgpr3 @@ -139,7 +139,7 @@ body: | bb.3: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 - $vcc = COPY $vgpr1 + $vcc = PRED_COPY $vgpr1 S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 081ff2a939550..3e6ebb48969bb 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -6,11 +6,12 @@ define hidden void @widget() { ; GCN-LABEL: widget: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 2 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -51,11 +52,12 @@ define hidden void @widget() { ; GCN-NEXT: .LBB0_7: ; %UnifiedReturnBlock ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 2 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; SI-OPT-LABEL: @widget( @@ -183,11 +185,12 @@ define hidden void @blam() { ; GCN-LABEL: blam: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 18 +; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill @@ -212,14 +215,14 @@ define hidden void @blam() { ; GCN-NEXT: v_writelane_b32 v40, s49, 15 ; GCN-NEXT: v_writelane_b32 v40, s30, 16 ; GCN-NEXT: v_writelane_b32 v40, s31, 17 -; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s44, s15 +; GCN-NEXT: s_mov_b64 s[34:35], s[6:7] +; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s45, s14 ; GCN-NEXT: s_mov_b32 s46, s13 ; GCN-NEXT: s_mov_b32 s47, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN-NEXT: s_mov_b64 s[36:37], s[10:11] +; GCN-NEXT: s_mov_b64 s[38:39], s[8:9] ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -276,9 +279,9 @@ define hidden void @blam() { ; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2 ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN-NEXT: s_mov_b64 s[6:7], s[34:35] +; GCN-NEXT: s_mov_b64 s[8:9], s[38:39] +; GCN-NEXT: s_mov_b64 s[10:11], s[36:37] ; GCN-NEXT: s_mov_b32 s12, s47 ; GCN-NEXT: s_mov_b32 s13, s46 ; GCN-NEXT: s_mov_b32 s14, s45 diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-code-object-version.ll b/llvm/test/CodeGen/AMDGPU/unsupported-code-object-version.ll new file mode 100644 index 0000000000000..4ee7538d78852 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/unsupported-code-object-version.ll @@ -0,0 +1,8 @@ +; RUN: sed 's/CODE_OBJECT_VERSION/0/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s +; RUN: sed 's/CODE_OBJECT_VERSION/100/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s +; RUN: sed 's/CODE_OBJECT_VERSION/9900/g' %s | not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 2>&1 | FileCheck --check-prefix=HSA-ERROR %s + +; HSA-ERROR: Unexpected code object version + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION} diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll index 42b81236e55ef..f27d8fd88b8bb 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll @@ -5,7 +5,7 @@ ; feature, and instead generates a selection error. ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: load_1d) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) (in function: load_1d) define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll index d50d9166d708c..058b339989761 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll @@ -8,7 +8,7 @@ ; generates a selection error. ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.sample.d.1d -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s32), %{{[0-9]+}}:_(<8 x s32>), %{{[0-9]+}}:_(<4 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: sample_d_1d) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s32), %{{[0-9]+}}:_(<8 x s32>), %{{[0-9]+}}:_(<4 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) (in function: sample_d_1d) define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 33038bb4fe238..dbec6f144b4c6 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_mul_lo_u32 v4, s1, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_mul_lo_u32 v3, s0, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -104,19 +104,19 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; GCN-NEXT: v_mov_b32_e32 v5, s11 +; GCN-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s13, v1 ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s12, v0 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s13, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; @@ -124,41 +124,38 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: s_flbit_i32_b32 s10, s4 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s11, s5 -; GCN-IR-NEXT: s_add_i32 s10, s10, 32 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s10, s11 -; GCN-IR-NEXT: s_min_u32 s14, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s12, s10, s14 -; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[8:9], s[16:17] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[16:17], exec -; GCN-IR-NEXT: s_cselect_b32 s9, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s8, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 +; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s12, s4 +; GCN-IR-NEXT: s_add_i32 s14, s12, 32 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[10:11] +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s5 +; GCN-IR-NEXT: s_min_u32 s10, s14, s8 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s14, s8, s9 +; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 +; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s16, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s17, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[16:17], 0 -; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 +; GCN-IR-NEXT: s_add_u32 s12, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s16 +; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s12 ; GCN-IR-NEXT: s_add_u32 s16, s4, -1 ; GCN-IR-NEXT: s_addc_u32 s17, s5, -1 ; GCN-IR-NEXT: s_not_b64 s[6:7], s[10:11] @@ -189,24 +186,30 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow6 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GCN-IR-NEXT: .LBB0_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-IR-NEXT: s_mov_b32 s12, s0 -; GCN-IR-NEXT: s_mul_i32 s0, s4, s9 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s7 +; GCN-IR-NEXT: s_branch .LBB0_6 +; GCN-IR-NEXT: .LBB0_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[12:13] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[12:13] +; GCN-IR-NEXT: .LBB0_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s4, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s4, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 +; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s5, s8 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s4, s8 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 -; GCN-IR-NEXT: s_mov_b32 s15, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s14, -1 -; GCN-IR-NEXT: s_mov_b32 s13, s1 +; GCN-IR-NEXT: s_mov_b32 s10, -1 +; GCN-IR-NEXT: s_mov_b32 s8, s0 +; GCN-IR-NEXT: s_mov_b32 s9, s1 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GCN-IR-NEXT: s_endpgm %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -750,9 +753,9 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_lo_u32 v2, s0, v1 ; GCN-NEXT: v_mul_hi_u32 v3, s0, v0 ; GCN-NEXT: v_mul_lo_u32 v4, s1, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_mul_lo_u32 v3, s0, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 @@ -778,7 +781,7 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_lo_u32 v1, s7, v0 ; GCN-NEXT: v_mul_hi_u32 v2, s6, v0 ; GCN-NEXT: v_mul_lo_u32 v0, s6, v0 -; GCN-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0, v1 ; GCN-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc @@ -796,50 +799,47 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 -; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_urem_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 -; GCN-IR-NEXT: s_add_i32 s8, s8, 32 -; GCN-IR-NEXT: s_min_u32 s8, s8, s9 -; GCN-IR-NEXT: s_add_u32 s10, s8, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[12:13], exec -; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] -; GCN-IR-NEXT: s_mov_b32 s7, 0 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 +; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s12, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[12:13], 0 -; GCN-IR-NEXT: s_sub_i32 s9, 63, s10 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s9 +; GCN-IR-NEXT: s_add_u32 s10, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s11, s7, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[10:11], 0 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s12 +; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 ; GCN-IR-NEXT: s_add_u32 s14, s2, -1 ; GCN-IR-NEXT: s_addc_u32 s15, s3, -1 ; GCN-IR-NEXT: s_sub_u32 s8, 58, s8 @@ -869,22 +869,27 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_3 ; GCN-IR-NEXT: .LBB6_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] -; GCN-IR-NEXT: .LBB6_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 -; GCN-IR-NEXT: v_mul_hi_u32 v0, s2, v0 -; GCN-IR-NEXT: s_mov_b32 s8, s0 -; GCN-IR-NEXT: s_mul_i32 s0, s2, s7 -; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s3, s6 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s2, s6 -; GCN-IR-NEXT: v_sub_i32_e64 v0, vcc, 24, s0 -; GCN-IR-NEXT: s_mov_b32 s10, -1 -; GCN-IR-NEXT: s_mov_b32 s9, s1 +; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 +; GCN-IR-NEXT: s_branch .LBB6_6 +; GCN-IR-NEXT: .LBB6_5: +; GCN-IR-NEXT: v_mov_b32_e32 v1, 0 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 24, 0, s[10:11] +; GCN-IR-NEXT: .LBB6_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, s2, v1 +; GCN-IR-NEXT: v_mul_hi_u32 v2, s2, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v3, s3, v0 +; GCN-IR-NEXT: v_mul_lo_u32 v0, s2, v0 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 +; GCN-IR-NEXT: s_mov_b32 s6, -1 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, 0, v1, vcc -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %result = urem i64 24, %x store i64 %result, i64 addrspace(1)* %out @@ -897,9 +902,9 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mov_b32_e32 v0, 0x4f800000 ; GCN-NEXT: v_madak_f32 v0, 0, v0, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 -; GCN-NEXT: s_movk_i32 s2, 0xffe8 -; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_movk_i32 s4, 0xffe8 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 @@ -907,13 +912,13 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_mov_b32 s1, s5 -; GCN-NEXT: v_mul_hi_u32 v2, v0, s2 -; GCN-NEXT: v_mul_lo_u32 v4, v1, s2 -; GCN-NEXT: v_mul_lo_u32 v3, v0, s2 -; GCN-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mul_hi_u32 v2, v0, s4 +; GCN-NEXT: v_mul_lo_u32 v4, v1, s4 +; GCN-NEXT: v_mul_lo_u32 v3, v0, s4 +; GCN-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v3 ; GCN-NEXT: v_mul_lo_u32 v4, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -930,12 +935,12 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GCN-NEXT: v_mul_hi_u32 v2, v0, s2 -; GCN-NEXT: v_mul_lo_u32 v3, v1, s2 -; GCN-NEXT: v_mul_lo_u32 v4, v0, s2 -; GCN-NEXT: s_mov_b32 s2, -1 -; GCN-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GCN-NEXT: v_mul_hi_u32 v2, v0, s4 +; GCN-NEXT: v_mul_lo_u32 v3, v1, s4 +; GCN-NEXT: v_mul_lo_u32 v4, v0, s4 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_subrev_i32_e32 v2, vcc, v0, v2 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_mul_lo_u32 v3, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v5, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v6, v0, v2 @@ -952,15 +957,15 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GCN-NEXT: v_mul_lo_u32 v2, s6, v1 -; GCN-NEXT: v_mul_hi_u32 v3, s6, v0 -; GCN-NEXT: v_mul_hi_u32 v4, s6, v1 -; GCN-NEXT: v_mul_hi_u32 v5, s7, v1 -; GCN-NEXT: v_mul_lo_u32 v1, s7, v1 +; GCN-NEXT: v_mul_lo_u32 v2, s2, v1 +; GCN-NEXT: v_mul_hi_u32 v3, s2, v0 +; GCN-NEXT: v_mul_hi_u32 v4, s2, v1 +; GCN-NEXT: v_mul_hi_u32 v5, s3, v1 +; GCN-NEXT: v_mul_lo_u32 v1, s3, v1 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GCN-NEXT: v_mul_lo_u32 v4, s7, v0 -; GCN-NEXT: v_mul_hi_u32 v0, s7, v0 +; GCN-NEXT: v_mul_lo_u32 v4, s3, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s3, v0 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc @@ -970,8 +975,8 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_hi_u32 v2, v0, 24 ; GCN-NEXT: v_mul_lo_u32 v0, v0, 24 ; GCN-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-NEXT: v_mov_b32_e32 v2, s7 -; GCN-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 +; GCN-NEXT: v_mov_b32_e32 v2, s3 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc ; GCN-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 ; GCN-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v1, vcc @@ -982,16 +987,16 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GCN-NEXT: v_cndmask_b32_e32 v6, -1, v6, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], 23, v0 ; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[0:1] +; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 23, v0 -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; GCN-IR-LABEL: s_test_urem_k_den_i64: @@ -1002,29 +1007,26 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 ; GCN-IR-NEXT: s_add_i32 s6, s6, 32 ; GCN-IR-NEXT: s_min_u32 s8, s6, s7 -; GCN-IR-NEXT: s_sub_u32 s10, 59, s8 -; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 +; GCN-IR-NEXT: s_sub_u32 s6, 59, s8 +; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 -; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s12, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[12:13], 0 -; GCN-IR-NEXT: s_sub_i32 s9, 63, s10 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] -; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s9 +; GCN-IR-NEXT: s_add_u32 s10, s6, 1 +; GCN-IR-NEXT: s_addc_u32 s11, s7, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[10:11], 0 +; GCN-IR-NEXT: s_sub_i32 s6, 63, s6 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], s[2:3], s12 +; GCN-IR-NEXT: s_lshr_b64 s[10:11], s[2:3], s10 ; GCN-IR-NEXT: s_add_u32 s8, s8, 0xffffffc4 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 @@ -1051,21 +1053,28 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow5 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] -; GCN-IR-NEXT: .LBB7_5: ; %udiv-end -; GCN-IR-NEXT: v_mul_hi_u32 v0, s6, 24 -; GCN-IR-NEXT: s_mov_b32 s8, s0 -; GCN-IR-NEXT: s_mul_i32 s0, s7, 24 +; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s4 +; GCN-IR-NEXT: v_mov_b32_e32 v1, s5 +; GCN-IR-NEXT: s_branch .LBB7_6 +; GCN-IR-NEXT: .LBB7_5: +; GCN-IR-NEXT: v_mov_b32_e32 v0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[10:11] +; GCN-IR-NEXT: v_mov_b32_e32 v0, s2 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[10:11] +; GCN-IR-NEXT: .LBB7_6: ; %udiv-end +; GCN-IR-NEXT: v_mul_lo_u32 v1, v1, 24 +; GCN-IR-NEXT: v_mul_hi_u32 v2, v0, 24 +; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, 24 +; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 +; GCN-IR-NEXT: s_mov_b32 s6, -1 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s0, v0 -; GCN-IR-NEXT: s_mul_i32 s0, s6, 24 -; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 -; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: s_mov_b32 s10, -1 -; GCN-IR-NEXT: s_mov_b32 s9, s1 +; GCN-IR-NEXT: s_mov_b32 s4, s0 +; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc -; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-IR-NEXT: s_endpgm %result = urem i64 %x, 24 store i64 %result, i64 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir new file mode 100644 index 0000000000000..7d616ac67bfa5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -0,0 +1,202 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=MUBUF %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog -mattr=+enable-flat-scratch %s -o - | FileCheck --check-prefix=FLATSCR %s + +--- +name: use_restore_frame_reg +tracksRegLiveness: true + +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 2, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 3, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 4, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 5, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 6, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 7, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 8, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 9, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 10, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 11, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 12, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 13, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 14, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 15, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 16, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 17, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 18, type: default, offset: 0, size: 4, alignment: 8192 } + +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + +body: | + ; MUBUF-LABEL: name: use_restore_frame_reg + ; MUBUF: bb.0: + ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; MUBUF-NEXT: liveins: $vgpr1, $vgpr2 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 + ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33 + ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5) + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 9961728 + ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16 + ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc + ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc + ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec + ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.1: + ; MUBUF-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-NEXT: liveins: $vgpr2 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: S_NOP 0 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.2: + ; MUBUF-NEXT: liveins: $vgpr2 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc + ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5) + ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -11010048, implicit-def dead $scc + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16 + ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4 + ; MUBUF-NEXT: S_ENDPGM 0 + ; FLATSCR-LABEL: name: use_restore_frame_reg + ; FLATSCR: bb.0: + ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28_lo16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29_lo16 + ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 + ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5) + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 9961728 + ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc + ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc + ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc + ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 155648, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -155648, implicit-def $scc + ; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.1: + ; FLATSCR-NEXT: successors: %bb.2(0x80000000) + ; FLATSCR-NEXT: liveins: $vgpr2 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: S_NOP 0 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.2: + ; FLATSCR-NEXT: liveins: $vgpr2 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0 + ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc + ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5) + ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -172032, implicit-def dead $scc + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 + ; FLATSCR-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr1 + + S_CMP_EQ_U32 0, 0, implicit-def $scc + S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec + $vgpr0 = V_OR_B32_e32 %stack.18, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + S_CBRANCH_VCCNZ %bb.2, implicit $vcc + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_NOP 0 + + bb.2: + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir index db54cf60fba44..c277ce444b81a 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+enable-flat-scratch -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s # A spilled register can be restored to its superclass during regalloc. @@ -17,12 +18,18 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_1_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 - ; GCN: {{ $}} - ; GCN: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) - ; GCN: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51_lo16 + ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 @@ -42,14 +49,20 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_2_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 - ; GCN: {{ $}} - ; GCN: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr50 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51_lo16 + ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr50 = PRED_COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 @@ -69,16 +82,22 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 - ; GCN: {{ $}} - ; GCN: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) - ; GCN: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr50 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr49 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51_lo16 + ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr51 = PRED_COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr50 = PRED_COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr49 = PRED_COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 + ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 @@ -98,16 +117,30 @@ body: | ; GCN-LABEL: name: full_spill_a128_restore_to_v128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 - ; GCN: {{ $}} - ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 - ; GCN: $vgpr55 = COPY $vgpr0, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN: $vgpr54 = COPY $vgpr1, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN: $vgpr53 = COPY $vgpr2, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN: $vgpr52 = COPY $vgpr3, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 - ; GCN: S_ENDPGM 0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $vgpr55 = PRED_COPY $vgpr0, implicit-def $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr54 = PRED_COPY $vgpr1, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr53 = PRED_COPY $vgpr2, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: $vgpr52 = PRED_COPY $vgpr3, implicit $vgpr52_vgpr53_vgpr54_vgpr55 + ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr52_vgpr53_vgpr54_vgpr55 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 @@ -127,12 +160,18 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $agpr24_agpr25, $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: {{ $}} - ; GCN: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) - ; GCN: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 @@ -152,14 +191,20 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $agpr24_agpr25, $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: {{ $}} - ; GCN: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr28 = COPY $agpr31, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr28 = PRED_COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 @@ -179,16 +224,22 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_3_of_4 ; GCN: liveins: $agpr24, $agpr25, $agpr30, $agpr31, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: {{ $}} - ; GCN: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) - ; GCN: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr28 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr27 = COPY $agpr31, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) - ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29_lo16 + ; GCN-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $agpr29 = PRED_COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr28 = PRED_COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr27 = PRED_COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 + ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 @@ -208,16 +259,30 @@ body: | ; GCN-LABEL: name: full_spill_v128_restore_to_a128 ; GCN: liveins: $agpr4, $agpr5, $agpr6, $agpr7, $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: {{ $}} - ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN: $agpr3 = COPY $agpr4, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN: $agpr2 = COPY $agpr5, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN: $agpr1 = COPY $agpr6, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN: $agpr0 = COPY $agpr7, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN: S_ENDPGM 0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6_lo16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7_lo16 + ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: $agpr3 = PRED_COPY $agpr4, implicit-def $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr2 = PRED_COPY $agpr5, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr1 = PRED_COPY $agpr6, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: $agpr0 = PRED_COPY $agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GCN-NEXT: S_ENDPGM 0 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index e029286333a8b..1446c0c1b65ab 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -1667,8 +1667,8 @@ define <6 x half> @shuffle_v6f16_452367(<6 x half> addrspace(1)* %arg0, <6 x hal ; GFX11-NEXT: scratch_load_b128 v[0:3], off, s32 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b96 off, v[4:6], s32 offset:16 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v2, v3 @@ -1771,7 +1771,6 @@ define <4 x half> @shuffle_v4f16_0456(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: ; kill: killed $vgpr0 killed $vgpr1 @@ -1786,7 +1785,6 @@ define <4 x half> @shuffle_v4f16_0456(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[4:5], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[2:3], off ; GFX10-NEXT: ; kill: killed $vgpr0 killed $vgpr1 ; GFX10-NEXT: ; kill: killed $vgpr2 killed $vgpr3 @@ -1800,7 +1798,6 @@ define <4 x half> @shuffle_v4f16_0456(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b64 v[1:2], v[2:3], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 diff --git a/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir b/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir index 07f4067ad0b19..ca6fa25d8c919 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir @@ -19,9 +19,9 @@ body: | %2:sgpr_256 = IMPLICIT_DEF %3:sgpr_128 = IMPLICIT_DEF - %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1, %2, 8, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") - IMAGE_STORE_V1_V1_gfx90a $vgpr1, %5, %2, 2, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource") - %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32, $vgpr1, %2, 1, -1, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1, %2, 8, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7) + IMAGE_STORE_V1_V1_gfx90a $vgpr1, %5, %2, 2, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), addrspace 7) + %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32, $vgpr1, %2, 1, -1, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ... diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index 5cb44fa6f1045..10ce5d51e3c37 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -8,9 +8,9 @@ define amdgpu_ps float @else1(i32 %z, float %v) #0 { ; SI-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY1]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} @@ -18,7 +18,7 @@ define amdgpu_ps float @else1(i32 %z, float %v) #0 { ; SI-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %13:vgpr_32, %bb.0, %4, %bb.3 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, undef %15:vgpr_32, %bb.3 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, undef %15:vgpr_32, %bb.3 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -31,13 +31,13 @@ define amdgpu_ps float @else1(i32 %z, float %v) #0 { ; SI-NEXT: bb.3.else: ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, killed [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.end: ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %3, %bb.2 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[PHI2]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI2]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -64,9 +64,9 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 { ; SI-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY1]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.3 ; SI-NEXT: {{ $}} @@ -74,20 +74,20 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 { ; SI-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, %5, %bb.3 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, [[COPY]], %bb.3 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, [[PRED_COPY]], %bb.3 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.2.if: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: %4:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PRED_COPY]], 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.4 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.3.else: ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1077936128, 0, [[PRED_COPY]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.end: @@ -95,7 +95,7 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 { ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %4, %bb.2 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: %15:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI2]], 0, killed [[PHI3]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed %15 + ; SI-NEXT: $vgpr0 = PRED_COPY killed %15 ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -123,18 +123,18 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $vgpr2 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY killed $sgpr0 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:sgpr_32 = PRED_COPY killed $sgpr0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY3]], implicit $exec ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.for.body: ; SI-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %14, %bb.5 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %13, %bb.5 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY]], %bb.0, %13, %bb.5 ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.4 ; SI-NEXT: {{ $}} @@ -150,14 +150,14 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: bb.3.if: ; SI-NEXT: successors: %bb.5(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PHI]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: %7:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PHI]], 0, [[PRED_COPY2]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, killed [[PHI4]], 0, implicit $exec ; SI-NEXT: S_BRANCH %bb.5 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4.else: ; SI-NEXT: successors: %bb.2(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY2]], 0, [[PHI1]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: %9:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[PRED_COPY2]], 0, [[PHI1]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[PHI1]], 3, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -169,13 +169,13 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[PHI6]], 0, implicit $exec ; SI-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[PHI]], 1, implicit-def dead $scc - ; SI-NEXT: S_CMP_LT_I32 [[S_ADD_I32_]], [[COPY1]], implicit-def $scc + ; SI-NEXT: S_CMP_LT_I32 [[S_ADD_I32_]], [[PRED_COPY1]], implicit-def $scc ; SI-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.for.end: ; SI-NEXT: %31:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI6]], 0, killed [[PHI5]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed %31 + ; SI-NEXT: $vgpr0 = PRED_COPY killed %31 ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 entry: ; %break = icmp sgt i32 %bound, 0 @@ -222,13 +222,13 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr5 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr4 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY5]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} @@ -236,9 +236,9 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %49:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %51:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %53:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %55:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY4]], %bb.0, undef %51:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY3]], %bb.0, undef %53:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY2]], %bb.0, undef %55:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -263,12 +263,12 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI5]] + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY6]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI5]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} @@ -276,20 +276,20 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.10(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]] - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] ; SI-NEXT: S_BRANCH %bb.10 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.else: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 + ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %61:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 - ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[COPY4]], %bb.6 + ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[PRED_COPY4]], %bb.6 ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 @@ -300,12 +300,12 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI7]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY9]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI7]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} @@ -313,13 +313,13 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]] - ; SI-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]] + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY10]] ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[PRED_COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[PHI8]] + ; SI-NEXT: $vgpr0 = PRED_COPY killed [[PHI8]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -345,13 +345,13 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) ; SI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4 - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr5 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr4 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[PRED_COPY5]], implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.6 ; SI-NEXT: {{ $}} @@ -359,8 +359,8 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %50:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -384,12 +384,12 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] - ; SI-NEXT: $vgpr0 = COPY [[COPY4]] + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY6]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} @@ -397,13 +397,13 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.10(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]] - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY7]] ; SI-NEXT: S_BRANCH %bb.10 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6.else: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1 + ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]], %subreg.sub0, killed [[PRED_COPY]], %subreg.sub1 ; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: @@ -420,12 +420,12 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] - ; SI-NEXT: $vgpr0 = COPY [[COPY4]] + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:sgpr_128 = PRED_COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = PRED_COPY killed [[PRED_COPY9]] + ; SI-NEXT: $vgpr0 = PRED_COPY [[PRED_COPY4]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; SI-NEXT: [[PRED_COPY10:%[0-9]+]]:vgpr_32 = PRED_COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} @@ -433,14 +433,14 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]] - ; SI-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]] + ; SI-NEXT: [[PRED_COPY11:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[PRED_COPY10]] ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[PRED_COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: %27:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI5]], 0, killed [[COPY4]], 0, 0, implicit $mode, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed %27 + ; SI-NEXT: %27:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI5]], 0, killed [[PRED_COPY4]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: $vgpr0 = PRED_COPY killed %27 ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -466,16 +466,16 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %s ; SI-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 - ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]](s32), implicit $exec + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY killed $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[PRED_COPY1]](s32), implicit $exec ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.if.then: ; SI-NEXT: successors: %bb.7(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4) + ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[PRED_COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4) ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %51, 0, implicit $exec ; SI-NEXT: %44:vgpr_32, dead %46:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %44, %subreg.sub1 @@ -504,7 +504,7 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %s ; SI-NEXT: bb.5.Flow: ; SI-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %52:vgpr_32, %bb.6 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[PRED_COPY1]](s32), %bb.0, undef %52:vgpr_32, %bb.6 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} @@ -566,11 +566,11 @@ define protected amdgpu_kernel void @nested_waterfalls(%tex* addrspace(1)* %tex. ; SI-NEXT: successors: %bb.1(0x80000000) ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1 - ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 - ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset.cast, align 4, addrspace 4) + ; SI-NEXT: [[PRED_COPY:%[0-9]+]]:sgpr_64(p4) = PRED_COPY killed $sgpr0_sgpr1 + ; SI-NEXT: [[PRED_COPY1:%[0-9]+]]:vgpr_32(s32) = PRED_COPY killed $vgpr0 + ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[PRED_COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.tex.coerce.kernarg.offset.cast, align 4, addrspace 4) ; SI-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]](s32), %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 + ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PRED_COPY1]](s32), %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.1.if.then: ; SI-NEXT: successors: %bb.2(0x80000000) @@ -581,16 +581,16 @@ define protected amdgpu_kernel void @nested_waterfalls(%tex* addrspace(1)* %tex. ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %85, %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s64) from %ir.idx, addrspace 1) ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 16, 0, implicit $exec :: (invariant load (s128) from %ir.6 + 16, addrspace 4) - ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub3 - ; SI-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub2 - ; SI-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub1 - ; SI-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub0 + ; SI-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub3 + ; SI-NEXT: [[PRED_COPY3:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub2 + ; SI-NEXT: [[PRED_COPY4:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_]].sub1 + ; SI-NEXT: [[PRED_COPY5:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_]].sub0 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[GLOBAL_LOAD_DWORDX2_]], 0, 0, implicit $exec :: (invariant load (s128) from %ir.6, align 32, addrspace 4) - ; SI-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub3 - ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub2 - ; SI-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_1]].sub1 - ; SI-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[GLOBAL_LOAD_DWORDX4_1]].sub0 - ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3, killed [[COPY5]], %subreg.sub4, killed [[COPY4]], %subreg.sub5, killed [[COPY3]], %subreg.sub6, killed [[COPY2]], %subreg.sub7 + ; SI-NEXT: [[PRED_COPY6:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub3 + ; SI-NEXT: [[PRED_COPY7:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub2 + ; SI-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY [[GLOBAL_LOAD_DWORDX4_1]].sub1 + ; SI-NEXT: [[PRED_COPY9:%[0-9]+]]:vgpr_32 = PRED_COPY killed [[GLOBAL_LOAD_DWORDX4_1]].sub0 + ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_256 = REG_SEQUENCE killed [[PRED_COPY9]], %subreg.sub0, killed [[PRED_COPY8]], %subreg.sub1, killed [[PRED_COPY7]], %subreg.sub2, killed [[PRED_COPY6]], %subreg.sub3, killed [[PRED_COPY5]], %subreg.sub4, killed [[PRED_COPY4]], %subreg.sub5, killed [[PRED_COPY3]], %subreg.sub6, killed [[PRED_COPY2]], %subreg.sub7 ; SI-NEXT: [[GLOBAL_LOAD_DWORDX4_2:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 killed [[GLOBAL_LOAD_DWORDX2_]], 48, 0, implicit $exec :: (invariant load (s128) from %ir.8, addrspace 4) ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI-NEXT: {{ $}} @@ -642,7 +642,7 @@ define protected amdgpu_kernel void @nested_waterfalls(%tex* addrspace(1)* %tex. ; SI-NEXT: bb.5: ; SI-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[IMAGE_SAMPLE_V1_V2_gfx10_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V2_gfx10 undef %27:vreg_64, [[REG_SEQUENCE7]], killed [[REG_SEQUENCE10]], 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") + ; SI-NEXT: [[IMAGE_SAMPLE_V1_V2_gfx10_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V2_gfx10 undef %27:vreg_64, [[REG_SEQUENCE7]], killed [[REG_SEQUENCE10]], 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7) ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc ; SI-NEXT: SI_WATERFALL_LOOP %bb.4, implicit $exec ; SI-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir index 16a209dfe48ac..8f52af12d353f 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-def-heterogeneous-dwarf.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-vgpr-to-agpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-vgpr-to-agpr=true -verify-machineinstrs -start-before=si-lower-sgpr-spills -stop-after=prologepilog -o - %s | FileCheck %s # After handling the VGPR spill to AGPR copy in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0. # Otherwise, the test would crash during PEI while trying to replace the dead frame index. @@ -41,11 +42,19 @@ machineFunctionInfo: body: | ; CHECK-LABEL: name: test ; CHECK: bb.0: - ; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec - ; CHECK: DBG_DEF <{{.*}}>, $noreg - ; CHECK: bb.1: - ; CHECK: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x03, 0x30, 0x36, 0xe1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 bb.0: $vgpr2 = IMPLICIT_DEF SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll index d294add6fee5b..7e235b3bce0eb 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -2,8 +2,8 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register. diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir index 7dc41db36df53..6e8f5cfa58c21 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -45,7 +45,7 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -65,7 +65,7 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -150,7 +150,7 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -170,7 +170,7 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -253,7 +253,7 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc @@ -273,7 +273,7 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc @@ -358,7 +358,7 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -379,7 +379,7 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined @@ -445,7 +445,7 @@ body: | ; MUBUF-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -497,9 +497,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -550,9 +550,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -602,7 +602,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -681,7 +681,7 @@ body: | ; MUBUF-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -735,9 +735,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -789,9 +789,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined @@ -842,7 +842,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -923,7 +923,7 @@ body: | ; MUBUF-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -979,9 +979,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1_lo16 @@ -1034,9 +1034,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined @@ -1088,7 +1088,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1171,7 +1171,7 @@ body: | ; MUBUF-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1222,9 +1222,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1274,9 +1274,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1325,7 +1325,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1403,7 +1403,7 @@ body: | ; MUBUF-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1455,9 +1455,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1507,9 +1507,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1558,7 +1558,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1637,7 +1637,7 @@ body: | ; MUBUF-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1690,9 +1690,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1742,9 +1742,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16 @@ -1793,7 +1793,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -1923,7 +1923,7 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -1973,7 +1973,7 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -2101,7 +2101,7 @@ body: | ; MUBUF-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -2153,9 +2153,9 @@ body: | ; GFX9-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -2206,9 +2206,9 @@ body: | ; GFX10-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16 @@ -2259,7 +2259,7 @@ body: | ; VMEM-GFX8-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 @@ -2362,7 +2362,7 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc @@ -2383,7 +2383,7 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} - ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe1 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index bf8e386248bb9..d3dc8a919b8aa 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -11,11 +11,12 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-LABEL: non_preserved_vgpr_tuple8: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -53,11 +54,12 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -65,12 +67,13 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s5, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v40, s4, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -110,12 +113,13 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s4, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s5, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 +; GFX10-NEXT: s_mov_b32 s33, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -123,11 +127,12 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:16 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 @@ -166,11 +171,12 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:16 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -201,11 +207,12 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-LABEL: call_preserved_vgpr_tuple8: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill @@ -237,11 +244,12 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -249,12 +257,13 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_or_saveexec_b32 s5, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v40, s4, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill @@ -287,12 +296,13 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 -; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX10-NEXT: v_readlane_b32 s4, v40, 2 +; GFX10-NEXT: s_or_saveexec_b32 s5, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 +; GFX10-NEXT: s_mov_b32 s33, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -300,11 +310,12 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:20 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v40, s33, 2 +; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:20 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:16 @@ -339,11 +350,12 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:16 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-NEXT: v_readlane_b32 s0, v40, 2 +; GFX11-NEXT: s_or_saveexec_b32 s1, -1 +; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:20 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_addk_i32 s32, 0xffe0 -; GFX11-NEXT: v_readlane_b32 s33, v40, 2 -; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:20 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/vselect.ll b/llvm/test/CodeGen/AMDGPU/vselect.ll index 9b1b552b82a0f..6e7bdb107a979 100644 --- a/llvm/test/CodeGen/AMDGPU/vselect.ll +++ b/llvm/test/CodeGen/AMDGPU/vselect.ll @@ -13,9 +13,9 @@ ; VI: s_cselect_b32 ; SI-DAG: s_cmp_gt_i32 -; SI-DAG: s_cselect_b32 +; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: s_cmp_gt_i32 -; SI-DAG: s_cselect_b32 +; SI-DAG: v_cndmask_b32_e32 define amdgpu_kernel void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1, <2 x i32> %val) { entry: @@ -59,10 +59,10 @@ entry: ; VI: s_cselect_b32 ; VI: s_cselect_b32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 -; SI-DAG: s_cselect_b32 +; SI-DAG: v_cndmask_b32_e64 +; SI-DAG: v_cndmask_b32_e64 +; SI-DAG: v_cndmask_b32_e64 +; SI-DAG: v_cndmask_b32_e32 define amdgpu_kernel void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1, <4 x i32> %val) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir index 74a22bf4d766e..08577d9939726 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir @@ -8,11 +8,11 @@ body: | ; GCN-LABEL: name: waitcnt-check-inorder ; GCN: S_WAITCNT 0 ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... --- @@ -22,11 +22,11 @@ body: | ; GCN-LABEL: name: waitcnt-check-vs-vmem ; GCN: S_WAITCNT 0 ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -37,11 +37,11 @@ body: | ; GCN-LABEL: name: waitcnt-check-vs-mimg-samp ; GCN: S_WAITCNT 0 ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_ENDPGM 0 ... @@ -54,10 +54,10 @@ body: | ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_WAITCNT 16240 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... --- @@ -69,9 +69,9 @@ body: | ; GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) ; GCN-NEXT: S_WAITCNT 16240 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; GCN-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir index d8555a1f15770..0892687625748 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s # Check that the waitcnt pass does *not* insert a redundant waitcnt instr. @@ -35,6 +36,6 @@ name: waitcnt-no-war-wait body: | bb.0: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr0_sgpr1_sgpr2_sgpr3, 276, 0 :: (dereferenceable invariant load (s32)) - TBUFFER_STORE_FORMAT_X_OFFEN_exact killed renamable $vgpr0, renamable $vgpr15, renamable $sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr9, 0, 116, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFEN_exact killed renamable $vgpr0, renamable $vgpr15, renamable $sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr9, 0, 116, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7) ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir index 0c433dcb59bbd..1365ff559f3e8 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -34,15 +34,17 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-LABEL: name: test_waitcnt_preexisting_vscnt_needs_vscnt - ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1 - ; GFX10: S_BARRIER - ; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GFX10: S_WAITCNT 112 - ; GFX10: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX10: S_ENDPGM 0 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_WAITCNT 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: S_BARRIER + ; GFX10-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-NEXT: S_WAITCNT 112 + ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 1 S_BARRIER @@ -114,16 +116,18 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-LABEL: name: test_waitcnt_preexisting_vscnt_combined_both_types - ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1 - ; GFX10: S_BARRIER - ; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GFX10: S_WAITCNT 112 - ; GFX10: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX10: S_ENDPGM 0 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_WAITCNT 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec + ; GFX10-NEXT: S_WAITCNT 0 + ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10-NEXT: S_BARRIER + ; GFX10-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-NEXT: S_WAITCNT 112 + ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT 0 S_WAITCNT_VSCNT undef $sgpr_null, 1 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir index e0d5110a7775a..163fcf0834dc4 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir @@ -72,3 +72,56 @@ body: | $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s128)) ... +# (global_load + scratch_load + buffer_load) +--- +name: global_scratch_buffer +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-LABEL: name: global_scratch_buffer + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec +... +# waw between flat and buffer should have a wait inserted between. +# (flat + buffer) +--- +name: flat_buffer +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-LABEL: name: flat_buffer + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec +... +# buffer + flat +--- +name: buffer_flat +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-LABEL: name: buffer_flat + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3952 + ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr +... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll index e78b5355c2d5d..f1c5c5b0ee65e 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx802 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_10,GFX8_9 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s ; GCN-LABEL: barrier_vmcnt_global: ; GFX8: flat_load_dword @@ -42,7 +42,7 @@ bb: %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4 store i32 0, i32 addrspace(1)* %tmp5, align 4 fence syncscope("singlethread") release - tail call void @llvm.amdgcn.s.barrier() + tail call void @llvm.amdgcn.s.barrier() #3 fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 @@ -116,7 +116,7 @@ bb: %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4 store i32 0, i32* %tmp5, align 4 fence syncscope("singlethread") release - tail call void @llvm.amdgcn.s.barrier() + tail call void @llvm.amdgcn.s.barrier() #3 fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index f7f804dc50b19..8780e8ba0e836 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1108,16 +1108,16 @@ declare void @external_void_func_void() #1 ; GCN-LABEL: {{^}}callee_no_stack_with_call: ; GCN: s_waitcnt ; GCN-NEXT: s_waitcnt_vscnt - +; GCN-NEXT: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]+]], -1{{$}} -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2 ; GCN-DAG: v_writelane_b32 v40, s30, 0 @@ -1129,15 +1129,16 @@ declare void @external_void_func_void() #1 ; GCN-DAG: v_readlane_b32 s31, v40, 1 -; GFX1064: s_addk_i32 s32, 0xfc00 -; GFX1032: s_addk_i32 s32, 0xfe00 -; GCN: v_readlane_b32 s33, v40, 2 +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]] +; GFX1064: s_addk_i32 s32, 0xfc00 +; GFX1032: s_addk_i32 s32, 0xfe00 +; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @callee_no_stack_with_call() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll new file mode 100644 index 0000000000000..325559abcdd02 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s + +; The test forces a high vector register pressure and there won't be sufficient VGPRs to be allocated +; for writelane/readlane SGPR spill instructions. Regalloc would split the vector register liverange +; by introducing a copy to AGPR register. The VGPR store to AGPR (v_accvgpr_write_b32) and later the +; restore from AGPR (v_accvgpr_read_b32) should be whole-wave operations and hence exec mask should be +; manipulated to ensure all lanes are active when these instructions are executed. +define void @vector_reg_liverange_split() #0 { +; GFX90A-LABEL: vector_reg_liverange_split: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_mov_b32 s16, s33 +; GFX90A-NEXT: s_mov_b32 s33, s32 +; GFX90A-NEXT: s_xor_saveexec_b64 s[18:19], -1 +; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-NEXT: s_mov_b64 exec, -1 +; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-NEXT: ; implicit-def: $vgpr0 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s20 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_writelane_b32 v0, s20, 0 +; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 +; GFX90A-NEXT: s_mov_b64 exec, s[28:29] +; GFX90A-NEXT: s_getpc_b64 s[16:17] +; GFX90A-NEXT: s_add_u32 s16, s16, foo@gotpcrel32@lo+4 +; GFX90A-NEXT: s_addc_u32 s17, s17, foo@gotpcrel32@hi+12 +; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 +; GFX90A-NEXT: s_mov_b64 exec, s[28:29] +; GFX90A-NEXT: v_readlane_b32 s20, v0, 0 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s20 +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: buffer_load_dword a32, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-NEXT: ; kill: killed $vgpr0 +; GFX90A-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-NEXT: s_mov_b64 exec, -1 +; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-NEXT: s_addk_i32 s32, 0xfc00 +; GFX90A-NEXT: s_mov_b32 s33, s4 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] + %s20 = call i32 asm sideeffect "; def $0","=${s20}"() + call void @foo() + call void asm sideeffect "; use $0","${s20}"(i32 %s20) + ret void +} + +declare void @foo() + +attributes #0 = { "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34"} diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll new file mode 100644 index 0000000000000..8b882ca4541a8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN-O0 %s + +; Test whole-wave register spilling. + +; In this testcase, the return address registers, PC value (SGPR30_SGPR31) and the scratch SGPR used in +; the inline asm statements should be preserved across the call. Since the test limits the VGPR numbers, +; the PC will be spilled to the only available CSR VGPR (VGPR40) as we spill CSR SGPRs including the PC +; directly to the physical VGPR lane to correctly generate the CFIs. The SGPR20 will get spilled to the +; virtual VGPR lane and that would be allocated by regalloc. Since there is no free VGPR to allocate, RA +; must spill a scratch VGPR. The writelane/readlane instructions that spill/restore SGPRs into/from VGPR +; are whole-wave operations and hence the VGPRs involved in such operations require whole-wave spilling. + +define void @test() #0 { +; GCN-LABEL: test: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s16, s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_xor_saveexec_b64 s[18:19], -1 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, -1 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[18:19] +; GCN-NEXT: v_writelane_b32 v40, s16, 4 +; GCN-NEXT: v_writelane_b32 v40, s28, 2 +; GCN-NEXT: v_writelane_b32 v40, s29, 3 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x800 +; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s16 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, ext_func@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, ext_func@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[28:29] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s4, v1, 0 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: ; kill: killed $vgpr1 +; GCN-NEXT: v_readlane_b32 s4, v40, 4 +; GCN-NEXT: v_readlane_b32 s28, v40, 2 +; GCN-NEXT: v_readlane_b32 s29, v40, 3 +; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, -1 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_addk_i32 s32, 0xf800 +; GCN-NEXT: s_mov_b32 s33, s4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GCN-O0-LABEL: test: +; GCN-O0: ; %bb.0: +; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-O0-NEXT: s_mov_b32 s16, s33 +; GCN-O0-NEXT: s_mov_b32 s33, s32 +; GCN-O0-NEXT: s_xor_saveexec_b64 s[18:19], -1 +; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_mov_b64 exec, -1 +; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_mov_b64 exec, s[18:19] +; GCN-O0-NEXT: v_writelane_b32 v40, s16, 4 +; GCN-O0-NEXT: v_writelane_b32 v40, s28, 2 +; GCN-O0-NEXT: v_writelane_b32 v40, s29, 3 +; GCN-O0-NEXT: s_add_i32 s32, s32, 0x400 +; GCN-O0-NEXT: ; implicit-def: $vgpr0 +; GCN-O0-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-O0-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-O0-NEXT: ;;#ASMSTART +; GCN-O0-NEXT: ; def s16 +; GCN-O0-NEXT: ;;#ASMEND +; GCN-O0-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_mov_b64 exec, s[28:29] +; GCN-O0-NEXT: s_getpc_b64 s[16:17] +; GCN-O0-NEXT: s_add_u32 s16, s16, ext_func@gotpcrel32@lo+4 +; GCN-O0-NEXT: s_addc_u32 s17, s17, ext_func@gotpcrel32@hi+12 +; GCN-O0-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GCN-O0-NEXT: s_mov_b64 s[22:23], s[2:3] +; GCN-O0-NEXT: s_mov_b64 s[20:21], s[0:1] +; GCN-O0-NEXT: s_mov_b64 s[0:1], s[20:21] +; GCN-O0-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) +; GCN-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[28:29] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0 +; GCN-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 +; GCN-O0-NEXT: v_mov_b32_e32 v1, s7 +; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 +; GCN-O0-NEXT: global_store_dword v[0:1], v2, off +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[28:29] +; GCN-O0-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-O0-NEXT: ; kill: killed $vgpr0 +; GCN-O0-NEXT: v_readlane_b32 s4, v40, 4 +; GCN-O0-NEXT: v_readlane_b32 s28, v40, 2 +; GCN-O0-NEXT: v_readlane_b32 s29, v40, 3 +; GCN-O0-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, -1 +; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] +; GCN-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; GCN-O0-NEXT: s_mov_b32 s33, s4 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_setpc_b64 s[30:31] + %sgpr = call i32 asm sideeffect "; def $0", "=s" () #0 + call void @ext_func() + store volatile i32 %sgpr, ptr addrspace(1) undef + ret void +} + +declare void @ext_func(); + +attributes #0 = { nounwind "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34"} diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 2167a5ab8f42d..3ca8d8c79b4b1 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -204,7 +204,6 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -219,7 +218,6 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -243,7 +241,6 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -258,7 +255,6 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -496,7 +492,6 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -518,7 +513,6 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -962,7 +956,6 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -986,7 +979,6 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -1176,7 +1168,6 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_nop 0 ; GFX9-W64-NEXT: buffer_load_dword v2, v2, s[0:3], 0 idxen ; GFX9-W64-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $scc killed $exec -; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_u32_e32 v1, v2, v1 @@ -1193,7 +1184,6 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v2, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $scc killed $exec -; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_nc_u32_e32 v1, v1, v2 @@ -2500,7 +2490,6 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -2522,7 +2511,6 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir index 4d0b9320f7be4..fbf0881dce0ca 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -79,10 +79,10 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 - %3 = COPY $vgpr0 - %2 = COPY $sgpr2 - %1 = COPY $sgpr1 - %0 = COPY $sgpr0 + %3 = PRED_COPY $vgpr0 + %2 = PRED_COPY $sgpr2 + %1 = PRED_COPY $sgpr1 + %0 = PRED_COPY $sgpr0 S_CMP_LT_I32 0, %0, implicit-def $scc %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec %5 = S_CSELECT_B32 %2, %1, implicit $scc @@ -98,9 +98,9 @@ body: | # #CHECK: %bb.1 #CHECK: S_CMP_LT_I32 -#CHECK: COPY $scc +#CHECK: PRED_COPY $scc #CHECK: ENTER_STRICT_WWM -#CHECK: $scc = COPY +#CHECK: $scc = PRED_COPY #CHECK: S_CSELECT_B32 name: test_strict_wwm_scc2 tracksRegLiveness: true @@ -108,10 +108,10 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 - %3:vgpr_32 = COPY $vgpr0 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:vgpr_32 = PRED_COPY $vgpr0 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %13:sgpr_128 = IMPLICIT_DEF bb.1: @@ -121,14 +121,14 @@ body: | %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec $vgpr0 = STRICT_WWM %11:vgpr_32, implicit $exec - $vgpr1 = COPY %10:vgpr_32 + $vgpr1 = PRED_COPY %10:vgpr_32 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 ... --- # V_SET_INACTIVE, when its second operand is undef, is replaced by a -# COPY by si-wqm. Ensure the instruction is removed. +# PRED_COPY by si-wqm. Ensure the instruction is removed. #CHECK-NOT: V_SET_INACTIVE name: no_cfg alignment: 1 @@ -166,18 +166,18 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %5:sgpr_128 = COPY %6 + %5:sgpr_128 = PRED_COPY %6 %7:sreg_32 = S_MOV_B32 0 %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, implicit $exec - %16:vgpr_32 = COPY %8.sub1 - %11:vgpr_32 = COPY %16 + %16:vgpr_32 = PRED_COPY %8.sub1 + %11:vgpr_32 = PRED_COPY %16 %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc - %14:vgpr_32 = COPY %7 + %14:vgpr_32 = PRED_COPY %7 %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, 0, implicit $exec @@ -188,7 +188,7 @@ body: | --- # Ensure that strict_wwm is not put around an EXEC copy #CHECK-LABEL: name: copy_exec -#CHECK: %7:sreg_64 = COPY $exec +#CHECK: %7:sreg_64 = PRED_COPY $exec #CHECK-NEXT: %14:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec #CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec #CHECK-NEXT: $exec = EXIT_STRICT_WWM %14 @@ -199,22 +199,22 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - %3:sgpr_32 = COPY $sgpr3 - %2:sgpr_32 = COPY $sgpr2 - %1:sgpr_32 = COPY $sgpr1 - %0:sgpr_32 = COPY $sgpr0 + %3:sgpr_32 = PRED_COPY $sgpr3 + %2:sgpr_32 = PRED_COPY $sgpr2 + %1:sgpr_32 = PRED_COPY $sgpr1 + %0:sgpr_32 = PRED_COPY $sgpr0 %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %5:sreg_32 = S_MOV_B32 0 %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, implicit $exec - %8:sreg_64 = COPY $exec + %8:sreg_64 = PRED_COPY $exec %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63 early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec - %14:vgpr_32 = COPY %13 + %14:vgpr_32 = PRED_COPY %13 BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -228,9 +228,9 @@ body: | #CHECK-NEXT: S_CMP_EQ_U32 %2, 0, implicit-def $scc #CHECK-NEXT: undef %9.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 #CHECK-NEXT: %9.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 -#CHECK-NEXT: %14:sreg_32_xm0 = COPY $scc +#CHECK-NEXT: %14:sreg_32_xm0 = PRED_COPY $scc #CHECK-NEXT: $exec = S_AND_B64 $exec, %13, implicit-def $scc -#CHECK-NEXT: $scc = COPY %14 +#CHECK-NEXT: $scc = PRED_COPY %14 #CHECK-NEXT: %10:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 #CHECK-NEXT: %11:vreg_128 = IMAGE_SAMPLE_V4_V2 #CHECK-NEXT: S_CBRANCH_SCC0 %bb.2 @@ -240,18 +240,18 @@ body: | bb.0: liveins: $sgpr1, $sgpr2, $vgpr1, $vgpr2 - $m0 = COPY $sgpr1 - %0:vgpr_32 = COPY $vgpr1 - %1:vgpr_32 = COPY $vgpr2 - %8:sgpr_32 = COPY $sgpr2 + $m0 = PRED_COPY $sgpr1 + %0:vgpr_32 = PRED_COPY $vgpr1 + %1:vgpr_32 = PRED_COPY $vgpr2 + %8:sgpr_32 = PRED_COPY $sgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF %2:vgpr_32 = V_INTERP_P1_F32 %0:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec %3:vgpr_32 = V_INTERP_P1_F32 %1:vgpr_32, 3, 2, implicit $mode, implicit $m0, implicit $exec - undef %7.sub0:vreg_64 = COPY %2:vgpr_32 - %7.sub1:vreg_64 = COPY %3:vgpr_32 + undef %7.sub0:vreg_64 = PRED_COPY %2:vgpr_32 + %7.sub1:vreg_64 = PRED_COPY %3:vgpr_32 %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc @@ -270,10 +270,10 @@ body: | S_ENDPGM 0 bb.2: - $vgpr0 = COPY %4.sub0:vreg_128 - $vgpr1 = COPY %4.sub1:vreg_128 - $vgpr2 = COPY %9.sub0:vreg_128 - $vgpr3 = COPY %9.sub1:vreg_128 + $vgpr0 = PRED_COPY %4.sub0:vreg_128 + $vgpr1 = PRED_COPY %4.sub1:vreg_128 + $vgpr2 = PRED_COPY %9.sub0:vreg_128 + $vgpr3 = PRED_COPY %9.sub1:vreg_128 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 ... @@ -292,8 +292,8 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 - %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:vgpr_32 = COPY $vgpr0 + %0:sgpr_128 = PRED_COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:vgpr_32 = PRED_COPY $vgpr0 %2:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN %1:vgpr_32, %0:sgpr_128, 0, 0, 0, 0, 0, implicit $exec %2.sub0:vreg_64 = V_SET_INACTIVE_B32 %2.sub0:vreg_64, 0, implicit $exec, implicit-def $scc %2.sub1:vreg_64 = V_SET_INACTIVE_B32 %2.sub1:vreg_64, 0, implicit $exec, implicit-def $scc @@ -308,7 +308,7 @@ body: | # If not then initial V_MOV will not be in WQM. # #CHECK-LABEL: name: test_wqm_lr_phi -#CHECK: COPY $exec +#CHECK: PRED_COPY $exec #CHECK-NEXT: S_WQM #CHECK-NEXT: V_MOV_B32_e32 -10 #CHECK-NEXT: V_MOV_B32_e32 0 @@ -336,9 +336,9 @@ body: | bb.4: %3:sgpr_128 = IMPLICIT_DEF - %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") - $vgpr0 = COPY %4.sub0:vreg_128 - $vgpr1 = COPY %4.sub1:vreg_128 + %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + $vgpr0 = PRED_COPY %4.sub0:vreg_128 + $vgpr1 = PRED_COPY %4.sub1:vreg_128 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 ... @@ -351,8 +351,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -368,8 +368,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -385,8 +385,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -402,8 +402,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -419,8 +419,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF @@ -436,8 +436,8 @@ body: | bb.0: liveins: $vgpr1, $vgpr2 - undef %0.sub0:vreg_64 = COPY $vgpr1 - %0.sub1:vreg_64 = COPY $vgpr2 + undef %0.sub0:vreg_64 = PRED_COPY $vgpr1 + %0.sub1:vreg_64 = PRED_COPY $vgpr2 %100:sgpr_256 = IMPLICIT_DEF %101:sgpr_128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index f94c00b113499..7b94e9ba3c4b8 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -6,7 +6,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O0-LABEL: strict_wwm_no_cfg: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill @@ -51,7 +51,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O0-NEXT: s_mov_b32 s35, 2 ; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -62,7 +62,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O3-LABEL: strict_wwm_no_cfg: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill @@ -95,7 +95,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4 ; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4 ; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -132,11 +132,13 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-LABEL: strict_wwm_cfg: ; GFX9-O0: ; %bb.0: ; %entry ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 ; GFX9-O0-NEXT: s_mov_b32 s36, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 ; GFX9-O0-NEXT: s_mov_b32 s37, s5 @@ -144,10 +146,13 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 -; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 -; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 +; GFX9-O0-NEXT: v_writelane_b32 v3, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v3, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v3, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v3, s43, 3 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[44:45], -1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[44:45] ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -170,9 +175,16 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_or_saveexec_b64 s[44:45], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[44:45], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[44:45] ; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 @@ -196,13 +208,17 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[44:45], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[44:45] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s34, v0, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s36, v0, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v0, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v0, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v0, 3 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -214,10 +230,15 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s34 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[44:45], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[44:45] +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] @@ -225,7 +246,7 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O3-LABEL: strict_wwm_cfg: ; GFX9-O3: ; %bb.0: ; %entry ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] @@ -267,7 +288,7 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] @@ -331,13 +352,13 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O0-LABEL: strict_wwm_call: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 2 +; GFX9-O0-NEXT: s_mov_b32 s35, s33 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[36:37], -1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400 ; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1 @@ -369,26 +390,26 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[36:37], -1 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b32 s33, s35 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_call: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: v_writelane_b32 v3, s33, 2 +; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 ; GFX9-O3-NEXT: v_writelane_b32 v3, s31, 1 @@ -409,13 +430,13 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 -; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: v_readlane_b32 s33, v3, 2 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-O3-NEXT: s_mov_b32 s33, s38 ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -515,25 +536,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-LABEL: strict_wwm_call_i64: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b32 s44, s33 +; GFX9-O0-NEXT: s_mov_b32 s33, s32 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_writelane_b32 v10, s33, 8 -; GFX9-O0-NEXT: s_mov_b32 s33, s32 -; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 +; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000 +; GFX9-O0-NEXT: ; implicit-def: $vgpr0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s34, s8 @@ -542,10 +566,10 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: s_mov_b32 s37, s5 ; GFX9-O0-NEXT: s_mov_b32 s38, s6 ; GFX9-O0-NEXT: s_mov_b32 s39, s7 -; GFX9-O0-NEXT: v_writelane_b32 v10, s36, 2 -; GFX9-O0-NEXT: v_writelane_b32 v10, s37, 3 -; GFX9-O0-NEXT: v_writelane_b32 v10, s38, 4 -; GFX9-O0-NEXT: v_writelane_b32 v10, s39, 5 +; GFX9-O0-NEXT: v_writelane_b32 v0, s36, 0 +; GFX9-O0-NEXT: v_writelane_b32 v0, s37, 1 +; GFX9-O0-NEXT: v_writelane_b32 v0, s38, 2 +; GFX9-O0-NEXT: v_writelane_b32 v0, s39, 3 ; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 ; GFX9-O0-NEXT: s_mov_b32 s35, s9 ; GFX9-O0-NEXT: ; kill: def $sgpr40_sgpr41 killed $sgpr34_sgpr35 @@ -557,8 +581,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v9, s37 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_writelane_b32 v10, s34, 6 -; GFX9-O0-NEXT: v_writelane_b32 v10, s35, 7 +; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[42:43], -1 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_mov_b64 exec, s[42:43] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-O0-NEXT: s_mov_b32 s34, 32 ; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 @@ -575,12 +602,16 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s34, v10, 6 -; GFX9-O0-NEXT: v_readlane_b32 s35, v10, 7 -; GFX9-O0-NEXT: v_readlane_b32 s36, v10, 2 -; GFX9-O0-NEXT: v_readlane_b32 s37, v10, 3 -; GFX9-O0-NEXT: v_readlane_b32 s38, v10, 4 -; GFX9-O0-NEXT: v_readlane_b32 s39, v10, 5 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[42:43], -1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[42:43] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s34, v6, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v6, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v6, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v6, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v6, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v6, 3 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr40 @@ -595,51 +626,57 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[42:43], -1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[42:43] ; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 ; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 -; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 -; GFX9-O0-NEXT: v_readlane_b32 s33, v10, 8 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000 +; GFX9-O0-NEXT: s_mov_b32 s33, s44 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_call_i64: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O3-NEXT: s_mov_b32 s40, s33 +; GFX9-O3-NEXT: s_mov_b32 s33, s32 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: v_writelane_b32 v8, s33, 2 -; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 ; GFX9-O3-NEXT: v_writelane_b32 v8, s31, 1 @@ -670,22 +707,22 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 -; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: v_readlane_b32 s33, v8, 2 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 +; GFX9-O3-NEXT: s_mov_b32 s33, s40 ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) @@ -701,7 +738,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-LABEL: strict_wwm_amdgpu_cs_main: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -778,7 +815,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 ; GFX9-O0-NEXT: buffer_store_dwordx4 v[5:8], v0, s[36:39], s34 offen ; GFX9-O0-NEXT: buffer_store_dwordx2 v[3:4], v0, s[36:39], s34 offen offset:16 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -789,7 +826,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-LABEL: strict_wwm_amdgpu_cs_main: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -833,7 +870,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-NEXT: v_mov_b32_e32 v12, v6 ; GFX9-O3-NEXT: buffer_store_dwordx4 v[7:10], v0, s[4:7], 0 offen ; GFX9-O3-NEXT: buffer_store_dwordx2 v[11:12], v0, s[4:7], 0 offen offset:16 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll index 53c32f383ac7f..036d1d1417808 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll @@ -4,8 +4,8 @@ ; Test that custom pseudo source values can be round trip serialized through MIR. ; CHECK-LABEL: {{^}}name: shader -; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4, align 1, addrspace 4) -; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "ImageResource") +; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from unknown-address + 4, align 1, addrspace 7) +; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 7) ; CHECK: DS_GWS_BARRIER %[[#]], 63, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource") define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, <4 x i32> inreg %arg3) { %bload0 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %arg3, i32 4, i32 0, i32 0) diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index 2f4a431401de1..58399c3cfe031 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -37,6 +37,7 @@ ; AFTER-PEI-NEXT: occupancy: 5 ; AFTER-PEI-NEXT: scavengeFI: '%fixed-stack.0' ; AFTER-PEI-NEXT: vgprForAGPRCopy: '' +; AFTER-PEI-NEXT: sgprForEXECCopy: '' ; AFTER-PEI-NEXT: body: define amdgpu_kernel void @scavenge_fi(i32 addrspace(1)* %out, i32 %in) #0 { %wide.sgpr0 = call <32 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 172744e060cbf..c82f5f23a893c 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -46,6 +46,7 @@ # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' +# FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -144,6 +145,7 @@ body: | # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' +# FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -213,6 +215,7 @@ body: | # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' +# FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -283,6 +286,7 @@ body: | # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' +# FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: @@ -529,3 +533,28 @@ body: | SI_RETURN ... + +--- +# ALL-LABEL: name: sgpr_for_exec_copy +# ALL: sgprForEXECCopy: '$sgpr2_sgpr3' +name: sgpr_for_exec_copy +machineFunctionInfo: + sgprForEXECCopy: '$sgpr2_sgpr3' +body: | + bb.0: + SI_RETURN + +... + +--- +# ALL-LABEL: name: sgpr_for_exec_copy_noreg +# FULL: sgprForEXECCopy: '' +# SIMPLE-NOT: sgprForEXECCopy +name: sgpr_for_exec_copy_noreg +machineFunctionInfo: + sgprForEXECCopy: '$noreg' +body: | + bb.0: + SI_RETURN + +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index 867d73f18f0ca..c84ecf5989a93 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: vgprForAGPRCopy: '' +; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { %gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0 @@ -80,6 +81,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: vgprForAGPRCopy: '' +; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { %gep = getelementptr inbounds [128 x i32], [128 x i32] addrspace(2)* @gds, i32 0, i32 %arg0 @@ -134,6 +136,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: vgprForAGPRCopy: '' +; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: body: define void @function() { ret void @@ -180,6 +183,7 @@ define void @function() { ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 ; CHECK-NEXT: vgprForAGPRCopy: '' +; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: body: define void @function_nsz() #0 { ret void diff --git a/llvm/test/CodeGen/MIR/AMDGPU/sgpr-for-exec-copy-invalid-reg.mir b/llvm/test/CodeGen/MIR/AMDGPU/sgpr-for-exec-copy-invalid-reg.mir new file mode 100644 index 0000000000000..172c388e7cb11 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/sgpr-for-exec-copy-invalid-reg.mir @@ -0,0 +1,12 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s + +--- +name: invalid_reg +machineFunctionInfo: +# ERR: [[@LINE+1]]:21: unknown register name 'srst' + sgprForEXECCopy: '$srst' +body: | + bb.0: + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir index 156891fef3625..240c60e72db21 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir @@ -3,7 +3,7 @@ # contains not dead objects only. So using objects IDs as offset in the storage # caused out of bounds access. -# RUN: llc -march=amdgcn -run-pass=si-lower-sgpr-spills,prologepilog -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s # CHECK-LABEL: name: foo # CHECK: {{^}}fixedStack: [] diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf.ll b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf.ll index 93cfb9a02401b..a07d7feeeca07 100644 --- a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf.ll +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf.ll @@ -1,4 +1,5 @@ -; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -v -debug-info - | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -v -debug-info - | FileCheck --check-prefixes=COMMON,FLAT-SCR-DIS %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -v -debug-info - | FileCheck --check-prefixes=COMMON,FLAT-SCR-ENA %s source_filename = "heterogeneous-dwarf.cl" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" @@ -12,13 +13,20 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_regx SGPR33_LO16, DW_OP_lit6, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_swap, DW_OP_deref_size 0x4, DW_OP_swap, DW_OP_shr, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_constu 0x5, DW_OP_LLVM_form_aspace_address, DW_OP_lit16, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_LLVM_offset) ; CHECK-NEXT: DW_AT_name {{.*}}"B" +; COMMON: {{.*}}DW_TAG_variable +; FLAT-SCR-DIS: DW_AT_location [DW_FORM_exprloc] (DW_OP_regx SGPR33_LO16, DW_OP_lit6, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_swap, DW_OP_deref_size 0x4, DW_OP_swap, DW_OP_shr, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_constu 0x5, DW_OP_LLVM_form_aspace_address, DW_OP_lit20, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_LLVM_offset) +; FLAT-SCR-ENA: DW_AT_location [DW_FORM_exprloc] (DW_OP_regx SGPR33_LO16, DW_OP_deref_size 0x4, DW_OP_constu 0x5, DW_OP_LLVM_form_aspace_address, DW_OP_lit16, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_LLVM_offset) +; COMMON: DW_AT_name {{.*}}"C" + define protected amdgpu_kernel void @testKernel(i32 addrspace(1)* %A) #0 !dbg !11 !kernel_arg_addr_space !17 !kernel_arg_access_qual !18 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !20 { entry: %A.addr = alloca i32 addrspace(1)*, align 8, addrspace(5) %B = alloca i32, align 4, addrspace(5) + %C = alloca i32, align 4, addrspace(5) store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 8 call void @llvm.dbg.def(metadata !21, metadata i32 addrspace(1)* addrspace(5)* %A.addr), !dbg !23 call void @llvm.dbg.def(metadata !24, metadata i32 addrspace(5)* %B), !dbg !26 + call void @llvm.dbg.def(metadata !31, metadata i32 addrspace(5)* %C), !dbg !34 store i32 777, i32 addrspace(5)* %B, align 4, !dbg !26 %0 = load i32, i32 addrspace(5)* %B, align 4, !dbg !27 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 8, !dbg !28 @@ -28,7 +36,7 @@ entry: declare void @llvm.dbg.def(metadata, metadata) #1 -attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" } +attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!0} @@ -67,3 +75,7 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } !28 = !DILocation(line: 3, column: 4, scope: !11) !29 = !DILocation(line: 3, column: 6, scope: !11) !30 = !DILocation(line: 4, column: 1, scope: !11) +!31 = distinct !DILifetime(object: !32, location: !DIExpr(DIOpReferrer(i32 addrspace(5)*), DIOpDeref(i32))) +!32 = !DILocalVariable(name: "C", scope: !11, file: !1, line: 1, type: !33) +!33 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!34 = !DILocation(line: 5, column: 1, scope: !11) diff --git a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll b/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll new file mode 100644 index 0000000000000..6933f0bd826f7 --- /dev/null +++ b/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll @@ -0,0 +1,136 @@ +; RUN: llc %s -mtriple=x86_64-linux-gnu -generate-type-units -o - -filetype=obj \ +; RUN: | llvm-dwarfdump -o - - \ +; RUN: | FileCheck %s + +; XFAIL: * + +;; PR51087 +;; Check that types that are not referenecd in the CU and have type units +;; do not get unecessary skeleton DIEs in the CU, and that types that have +;; type units but are referenced in the CU still get CU DIEs. +;; +;; In the test (source below): +;; Unused is not used anywhere and should get only a type unit. +;; +;; Outer is used (by global O) so should get a CU DIE, but none of its nested +;; types (Inner, then nested again Enum1 and Enum2) are used so they should not. +;; +;; Ex is not used directly, but its nested type Enum is, so both should get +;; a DIE in the CU. Retained types and enums are emitted after globals, so +;; having Enum used by a local variable lets us check that type DIEs emitted +;; for types that initially only need type units still get a CU DIE later on +;; if required. +;; +;; Generated with `-Xclang -debug-info-kind=unused-types` (for Unused) from: +;; $ cat test.cpp +;; struct Unused {}; +;; +;; class Outer { +;; public: +;; struct Inner { +;; enum Enum1 { X }; +;; enum Enum2 { Y }; +;; Enum1 one; +;; Enum2 two; +;; }; +;; +;; Inner n; +;; } O; +;; +;; struct Ex { enum Enum { X }; }; +;; void fun() { Ex::Enum local; } + +;; Note: The types without a type_signature match here should only get type +;; units and no CU DIE. +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Outer'{{.+}} type_signature = [[SIG_Outer:[0-9a-fx]+]] +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Inner'{{.+}} +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum1'{{.+}} +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum2'{{.+}} +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum'{{.+}} type_signature = [[SIG_Enum:[0-9a-fx]+]] +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Ex'{{.+}} type_signature = [[SIG_Ex:[0-9a-fx]+]] +; CHECK: 0x00000000: Type Unit{{.+}} name = 'Unused'{{.+}} + +;; Check the CU references and skeleton DIEs are emitted correctly. +;; The check-not directives check that Unused doesn't get a DIE in the CU. +; CHECK: DW_TAG_compile_unit +; CHECK-NOT: DW_AT_signature +; CHECK: DW_AT_type ([[DIE_Outer:[0-9a-fx]+]] "Outer") +; CHECK-NOT: DW_AT_signature + +;; Outer is referenced in the CU so it needs a DIE, but its nested enums are not +;; and so should not have DIEs here. +; CHECK: [[DIE_Outer]]: DW_TAG_class_type +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_signature ([[SIG_Outer]]) + +; CHECK-NOT: DW_AT_signature +; CHECK: DW_AT_type ([[DIE_Enum:[0-9a-fx]+]] "Ex::Enum") +; CHECK-NOT: DW_AT_signature + +;; Ex is not referenced in the CU but its nested type, Enum, is. +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_signature ([[SIG_Ex]]) +; CHECK-EMPTY: +; CHECK-NEXT: [[DIE_Enum]]: DW_TAG_enumeration_type +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_signature ([[SIG_Enum]]) + +;; One last check that Unused has no CU DIE. +; CHECK-NOT: DW_AT_signature + +%class.Outer = type { %"struct.Outer::Inner" } +%"struct.Outer::Inner" = type { i32, i32 } + +@O = dso_local global %class.Outer zeroinitializer, align 4, !dbg !0 + +define dso_local void @_Z3funv() !dbg !31 { +entry: + %local = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i32* %local, metadata !34, metadata !DIExpression()), !dbg !35 + ret void, !dbg !36 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!25, !26, !27, !28, !29} +!llvm.ident = !{!30} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "O", scope: !2, file: !3, line: 13, type: !7, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !22, globals: !24, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.cpp", directory: "/") +!4 = !{!5, !13, !19} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum1", scope: !6, file: !3, line: 6, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN5Outer5Inner5Enum1E") +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Inner", scope: !7, file: !3, line: 5, size: 64, flags: DIFlagTypePassByValue, elements: !10, identifier: "_ZTSN5Outer5InnerE") +!7 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "Outer", file: !3, line: 3, size: 64, flags: DIFlagTypePassByValue, elements: !8, identifier: "_ZTS5Outer") +!8 = !{!9} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "n", scope: !7, file: !3, line: 12, baseType: !6, size: 64, flags: DIFlagPublic) +!10 = !{!11, !12} +!11 = !DIDerivedType(tag: DW_TAG_member, name: "one", scope: !6, file: !3, line: 8, baseType: !5, size: 32) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "two", scope: !6, file: !3, line: 9, baseType: !13, size: 32, offset: 32) +!13 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum2", scope: !6, file: !3, line: 7, baseType: !14, size: 32, elements: !15, identifier: "_ZTSN5Outer5Inner5Enum2E") +!14 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!15 = !{!16} +!16 = !DIEnumerator(name: "Y", value: 0, isUnsigned: true) +!17 = !{!18} +!18 = !DIEnumerator(name: "X", value: 0, isUnsigned: true) +!19 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", scope: !20, file: !3, line: 15, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN2Ex4EnumE") +!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ex", file: !3, line: 15, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS2Ex") +!21 = !{} +!22 = !{!23, !7, !6, !20} +!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Unused", file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS6Unused") +!24 = !{!0} +!25 = !{i32 7, !"Dwarf Version", i32 5} +!26 = !{i32 2, !"Debug Info Version", i32 3} +!27 = !{i32 1, !"wchar_size", i32 4} +!28 = !{i32 7, !"uwtable", i32 1} +!29 = !{i32 7, !"frame-pointer", i32 2} +!30 = !{!"clang version 14.0.0"} +!31 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 16, type: !32, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !21) +!32 = !DISubroutineType(types: !33) +!33 = !{null} +!34 = !DILocalVariable(name: "local", scope: !31, file: !3, line: 16, type: !19) +!35 = !DILocation(line: 16, column: 23, scope: !31) +!36 = !DILocation(line: 16, column: 30, scope: !31) diff --git a/llvm/test/LTO/ARM/lto-linking-metadata-already-present.ll b/llvm/test/LTO/ARM/lto-linking-metadata-already-present.ll new file mode 100644 index 0000000000000..cd877f9da23a3 --- /dev/null +++ b/llvm/test/LTO/ARM/lto-linking-metadata-already-present.ll @@ -0,0 +1,24 @@ +; RUN: opt %s -o %t1.bc + +; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-linked-module -save-merged-module -O1 --exported-symbol=foo +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s + +; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \ +; RUN: -r=%t1.bc,foo,pxl +; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck %s + +; Tests that LTO won't add LTOPostLink twice. + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7a-unknown-linux" + +define void @foo() { +entry: + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"LTOPostLink", i32 1} + +; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]} +; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/LTO/ARM/lto-linking-metadata-overwrite.ll b/llvm/test/LTO/ARM/lto-linking-metadata-overwrite.ll new file mode 100644 index 0000000000000..a17bf84474665 --- /dev/null +++ b/llvm/test/LTO/ARM/lto-linking-metadata-overwrite.ll @@ -0,0 +1,26 @@ +; RUN: opt %s -o %t1.bc + +; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-linked-module -save-merged-module -O1 --exported-symbol=foo +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s + +; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \ +; RUN: -r=%t1.bc,foo,pxl +; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck %s + +; Tests that LTO won't add LTOPostLink twice and will overwrite +; the existing flag with the correct value. + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7a-unknown-linux" + +define void @foo() { +entry: + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"LTOPostLink", i32 0} + +; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]} +; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1} + diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index 9e28a0559518f..afd763796eae4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -1,2816 +1,410 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_mov_b32 v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0xe4,0x00,0x00] -v_bfrev_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_mov_b32 v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x40,0x01,0x00] -v_bfrev_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_mov_b32 v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x41,0x01,0x00] -v_bfrev_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_mov_b32 v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x01,0x01,0x00] -v_bfrev_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_mov_b32 v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x0f,0x01,0x00] -v_bfrev_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_mov_b32 v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x11,0x01,0x00] -v_bfrev_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_mov_b32 v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1f,0x01,0x00] -v_bfrev_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_mov_b32 v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x21,0x01,0x00] -v_bfrev_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_mov_b32 v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x2f,0x01,0x00] -v_bfrev_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_mov_b32 v5, v1 row_share:0 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x50,0x01,0x00] -v_bfrev_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_mov_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x5f,0x01,0x00] -v_bfrev_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_mov_b32 v5, v1 row_xmask:0 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x60,0x01,0x00] -v_bfrev_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +v_mov_b32 v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x6f,0x01,0x00] -v_ceil_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x1 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x10] -v_ceil_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x30] -v_ceil_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xf0] -v_ceil_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] bank_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xf0] -v_ceil_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x01] -v_ceil_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x3 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x03] -v_ceil_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0xf +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x0f] -v_ceil_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x0f] -v_ceil_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_cvt_f32_u32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_cvt_u32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_cvt_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_cvt_f16_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] +v_cvt_f32_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_rpi_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_flr_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_off_f32_i4 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_cvt_f32_ubyte0 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_f32_ubyte1 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_f32_ubyte2 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_cvt_f32_ubyte3 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_fract_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_trunc_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_ceil_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_rndne_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_floor_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_exp_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_ceil_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x44,0xfe,0x7f,0xff,0x6f,0x3d,0x30] +v_log_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_rcp_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_rcp_iflag_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_rsq_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_sqrt_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_sin_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cos_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_not_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_ffbh_u32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_ffbl_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_ffbh_i32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_frexp_exp_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_frexp_mant_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cls_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +v_cvt_f16_u16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_f16_i16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_u16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_rcp_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_sqrt_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_rsq_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_log_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_exp_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_frexp_mant_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_frexp_exp_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_floor_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_ceil_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_trunc_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_clz_i32_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +v_rndne_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_fract_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_sin_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cos_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_norm_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_norm_u16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:0 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] -v_cos_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_mov_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_cvt_f32_u32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_cvt_u32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_cvt_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_cvt_f16_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xc2,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] +v_cvt_f32_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_rpi_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_flr_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_off_f32_i4 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_cvt_f32_ubyte0 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_f32_ubyte1 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_f32_ubyte2 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_cvt_f32_ubyte3 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_fract_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_trunc_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_ceil_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_rndne_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_floor_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_exp_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cos_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x6c,0xfe,0x7f,0xff,0x6f,0x3d,0x30] +v_log_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_rcp_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_rcp_iflag_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_rsq_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_sqrt_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_sin_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cos_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_not_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_ffbh_u32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_ffbl_b32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_ffbh_i32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_frexp_exp_i32_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_frexp_mant_f32 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_ctz_i32_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +v_cvt_f16_u16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_cvt_f16_i16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_cvt_u16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cvt_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_rcp_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_sqrt_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_rsq_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_log_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_exp_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_frexp_mant_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_frexp_exp_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_floor_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_ceil_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_trunc_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_f32 v127, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x14,0xfe,0x7e,0xff,0x6f,0x3d,0x30] +v_rndne_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_fract_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_sin_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_cos_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_cvt_norm_i16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_cvt_norm_u16_f16 v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_movreld_b32 v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00] -v_cvt_f16_i16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_movrels_b32 v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00] -v_cvt_f16_i16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_movrelsd_2_b32 v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00] -v_cvt_f16_i16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f16_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f16_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f16_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f16_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xa2,0xfe,0x7e,0x7f,0x6f,0x0d,0x30] - -v_cvt_f16_u16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f16_u16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f16_u16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f16_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f16_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f16_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xa0,0xfe,0x7e,0x7f,0x6f,0x0d,0x30] - -v_cvt_f32_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_f16 v255, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x16,0xfe,0x7f,0x7f,0x6f,0x3d,0x30] - -v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_i32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_i32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x0a,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_u32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_u32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_u32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x0c,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte0 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte0 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte0 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte0 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte0 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x22,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte1 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte1 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte1 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte1 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte1 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x24,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte2 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte2 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte2 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte2 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte2 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x26,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte3 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte3 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte3 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte3 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte3 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x28,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_floor_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_floor_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_floor_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_floor_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_floor_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x1a,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_flr_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_flr_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_flr_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_flr_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_flr_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x1a,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_i16_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_i16_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_i16_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xa6,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_cvt_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x10,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_i32_i16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_i32_i16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_i32_i16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_i32_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_i32_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xd4,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_i32_i16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xd4,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] - -v_cvt_nearest_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_nearest_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_nearest_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_nearest_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_nearest_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x18,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_norm_i16_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_norm_i16_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_norm_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_norm_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_norm_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xc6,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_cvt_norm_u16_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_norm_u16_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_norm_u16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_norm_u16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_norm_u16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xc8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_cvt_off_f32_i4 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_off_f32_i4 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_off_f32_i4 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_off_f32_i4 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_off_f32_i4 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_off_f32_i4 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x1c,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_cvt_rpi_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_rpi_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_rpi_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_rpi_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_rpi_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x18,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_u16_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_u16_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_u16_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_u16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_u16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_u16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xa4,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_cvt_u32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_u32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_u32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_u32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_u32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_u32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_cvt_u32_u16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_cvt_u32_u16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_cvt_u32_u16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_cvt_u32_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_cvt_u32_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_cvt_u32_u16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x0d,0x30] - -v_exp_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_exp_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_exp_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_exp_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_exp_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_exp_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_exp_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_exp_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_exp_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_exp_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_exp_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_exp_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_exp_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_exp_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb0,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_exp_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_exp_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_exp_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_exp_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_exp_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_exp_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_exp_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_exp_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_exp_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_exp_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_exp_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_exp_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_exp_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_exp_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x4a,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_ffbh_i32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_ffbh_i32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_ffbh_i32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_ffbh_i32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_ffbh_i32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_ffbh_i32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_ffbh_i32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_ffbh_i32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_ffbh_i32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_ffbh_i32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_ffbh_i32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_ffbh_i32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_ffbh_i32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_ffbh_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x76,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_ffbh_u32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_ffbh_u32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_ffbh_u32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_ffbh_u32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_ffbh_u32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_ffbh_u32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_ffbh_u32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_ffbh_u32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_ffbh_u32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_ffbh_u32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_ffbh_u32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_ffbh_u32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_ffbh_u32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_ffbh_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x72,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_ffbl_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_ffbl_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_ffbl_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_ffbl_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_ffbl_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_ffbl_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_ffbl_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_ffbl_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_ffbl_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_ffbl_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_ffbl_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_ffbl_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_ffbl_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_ffbl_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_floor_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_floor_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_floor_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_floor_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_floor_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_floor_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_floor_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_floor_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_floor_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_floor_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_floor_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_floor_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_floor_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_floor_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_floor_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_floor_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_floor_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_floor_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_floor_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_floor_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_floor_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_floor_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_floor_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_floor_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_floor_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_floor_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_floor_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_floor_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x48,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_fract_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_fract_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_fract_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_fract_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_fract_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_fract_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_fract_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_fract_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_fract_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_fract_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_fract_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_fract_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_fract_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_fract_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xbe,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_fract_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_fract_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_fract_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_fract_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_fract_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_fract_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_fract_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_fract_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_fract_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_fract_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_fract_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_fract_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_fract_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_fract_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x40,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_frexp_exp_i16_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i16_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_frexp_exp_i16_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i16_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_frexp_exp_i16_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb4,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_frexp_exp_i32_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i32_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_frexp_exp_i32_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_frexp_exp_i32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x7e,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_frexp_mant_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_frexp_mant_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_frexp_mant_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xb2,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_frexp_mant_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_frexp_mant_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_frexp_mant_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x80,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_log_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_log_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_log_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_log_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_log_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_log_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_log_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_log_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_log_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_log_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_log_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_log_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_log_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_log_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xae,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_log_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_log_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_log_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_log_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_log_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_log_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_log_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_log_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_log_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_log_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_log_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_log_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_log_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_log_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x4e,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_mov_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_mov_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_mov_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_mov_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_mov_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_mov_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_mov_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_mov_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_mov_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_mov_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_mov_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_mov_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_mov_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_mov_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_movreld_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_movreld_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_movreld_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_movreld_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_movreld_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_movreld_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_movreld_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_movreld_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_movreld_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_movreld_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_movreld_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_movreld_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_movreld_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_movreld_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_movrels_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_movrels_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_movrels_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_movrels_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_movrels_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_movrels_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_movrels_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_movrels_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_movrels_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_movrels_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_movrels_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_movrels_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_movrels_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_movrels_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_movrelsd_2_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_movrelsd_2_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_movrelsd_2_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_movrelsd_2_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_movrelsd_2_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_movrelsd_2_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_movrelsd_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_movrelsd_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_movrelsd_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_movrelsd_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_movrelsd_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_movrelsd_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_not_b16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_not_b16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_not_b16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_not_b16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_not_b16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_not_b16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_not_b16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_not_b16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_not_b16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_not_b16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_not_b16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_not_b16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_not_b16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_not_b16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xd2,0xfe,0x7e,0x7f,0x6f,0x0d,0x30] - -v_not_b32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_not_b32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_not_b32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_not_b32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_not_b32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_not_b32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_not_b32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_not_b32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_not_b32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_not_b32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_not_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_not_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_not_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_not_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x6e,0xfe,0x7f,0xff,0x6f,0x0d,0x30] - -v_rcp_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rcp_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rcp_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rcp_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rcp_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rcp_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rcp_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rcp_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rcp_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rcp_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rcp_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rcp_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rcp_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rcp_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xa8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_rcp_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rcp_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rcp_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rcp_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rcp_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rcp_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rcp_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rcp_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rcp_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rcp_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rcp_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rcp_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rcp_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rcp_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x54,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_rcp_iflag_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rcp_iflag_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rcp_iflag_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rcp_iflag_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rcp_iflag_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rcp_iflag_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x56,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_rndne_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rndne_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rndne_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rndne_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rndne_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rndne_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rndne_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rndne_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rndne_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rndne_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rndne_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rndne_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rndne_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rndne_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xbc,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_rndne_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rndne_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rndne_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rndne_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rndne_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rndne_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rndne_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rndne_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rndne_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rndne_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rndne_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rndne_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rndne_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rndne_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x46,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_rsq_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rsq_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rsq_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rsq_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rsq_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rsq_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rsq_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rsq_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rsq_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rsq_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rsq_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rsq_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rsq_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rsq_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xac,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_rsq_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_rsq_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_rsq_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_rsq_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_rsq_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_rsq_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_rsq_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_rsq_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_rsq_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_rsq_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_rsq_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_rsq_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_rsq_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_rsq_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_sat_pk_u8_i16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_sat_pk_u8_i16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_sat_pk_u8_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_sat_pk_u8_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_sat_pk_u8_i16 v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] - -v_sin_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_sin_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_sin_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_sin_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_sin_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_sin_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_sin_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_sin_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_sin_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_sin_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_sin_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_sin_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_sin_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_sin_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xc0,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_sin_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_sin_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_sin_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_sin_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_sin_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_sin_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_sin_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_sin_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_sin_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_sin_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_sin_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_sin_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_sin_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_sin_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x6a,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_sqrt_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_sqrt_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_sqrt_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_sqrt_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_sqrt_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_sqrt_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_sqrt_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_sqrt_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_sqrt_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_sqrt_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_sqrt_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_sqrt_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_sqrt_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_sqrt_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xaa,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_sqrt_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_sqrt_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_sqrt_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_sqrt_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_sqrt_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_sqrt_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_sqrt_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_sqrt_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_sqrt_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_sqrt_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_sqrt_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_sqrt_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_sqrt_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_sqrt_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x66,0xfe,0x7f,0xff,0x6f,0x3d,0x30] - -v_trunc_f16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_trunc_f16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_trunc_f16 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_trunc_f16 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_trunc_f16 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_trunc_f16 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_trunc_f16 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_trunc_f16 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_trunc_f16 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_trunc_f16 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_trunc_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_trunc_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_trunc_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_trunc_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0xba,0xfe,0x7e,0x7f,0x6f,0x3d,0x30] - -v_trunc_f32 v5, v1 quad_perm:[3,2,1,0] -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0xff] - -v_trunc_f32 v5, v1 quad_perm:[0,1,2,3] -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0xe4,0x00,0xff] - -v_trunc_f32 v5, v1 row_mirror -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x40,0x01,0xff] - -v_trunc_f32 v5, v1 row_half_mirror -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x41,0x01,0xff] - -v_trunc_f32 v5, v1 row_shl:1 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x01,0x01,0xff] - -v_trunc_f32 v5, v1 row_shl:15 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x0f,0x01,0xff] - -v_trunc_f32 v5, v1 row_shr:1 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x11,0x01,0xff] - -v_trunc_f32 v5, v1 row_shr:15 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1f,0x01,0xff] - -v_trunc_f32 v5, v1 row_ror:1 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x21,0x01,0xff] - -v_trunc_f32 v5, v1 row_ror:15 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x2f,0x01,0xff] - -v_trunc_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x50,0x01,0xff] - -v_trunc_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x5f,0x01,0x01] - -v_trunc_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x60,0x09,0x13] - -v_trunc_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: encoding: [0xfa,0x42,0xfe,0x7f,0xff,0x6f,0x3d,0x30] +v_movrelsd_b32 v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX11: encoding: [0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index 2b14ca5dd52be..29ecac6329021 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -1,605 +1,329 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_mov_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_i32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_bfrev_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x70,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_f32_u32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb8,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_cvt_f16_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_rpi_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ceil_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x44,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_flr_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cls_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_off_f32_i4 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cls_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_ubyte0 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cls_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x76,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_f32_ubyte1 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_clz_i32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_ubyte2 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_clz_i32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_ubyte3 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_clz_i32_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x72,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_fract_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xc2,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_trunc_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xc2,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ceil_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xc2,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_rndne_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x6c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_floor_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x6c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_exp_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cos_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x6c,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_log_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ctz_i32_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rcp_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ctz_i32_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rcp_iflag_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ctz_i32_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_rsq_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sqrt_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sin_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_f32 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x14,0xfe,0x7e,0xff,0x00,0x00,0x00] +v_cos_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_not_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_bfrev_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xa2,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_ffbh_u32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xa0,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ffbl_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xa0,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ffbh_i32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f16_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xa0,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_frexp_exp_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_frexp_mant_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f16_u16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_f16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x16,0xfe,0x7f,0x7f,0x00,0x00,0x00] +v_cvt_f16_i16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x0a,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_rcp_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x0c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sqrt_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x0c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rsq_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x0c,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_log_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte0 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x22,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_exp_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte0 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x22,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_frexp_mant_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte0 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x22,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_frexp_exp_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte1 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x24,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_floor_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte1 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x24,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ceil_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte1 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x24,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_trunc_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte2 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x26,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rndne_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte2 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x26,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_fract_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte2 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x26,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_sin_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte3 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x28,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cos_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte3 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x28,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_norm_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_f32_ubyte3 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x28,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_norm_u16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] +// GFX11: encoding: [0xe9,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_floor_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_mov_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:0 +// GFX11: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_floor_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_mov_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_floor_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x1a,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_f32_i32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_flr_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_u32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_flr_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_flr_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x1a,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xa6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f16_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xa6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xa6,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_cvt_rpi_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_flr_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x10,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_off_f32_i4 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x10,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_f32_ubyte0 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xd4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_ubyte1 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xd4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f32_ubyte2 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_i32_i16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xd4,0xfe,0x7f,0x7f,0x00,0x00,0x00] +v_cvt_f32_ubyte3 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_nearest_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_fract_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_nearest_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_trunc_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_nearest_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x18,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_ceil_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xc6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rndne_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xc6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_floor_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xc6,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_exp_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xc8,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_log_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xc8,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rcp_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_norm_u16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xc8,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_rcp_iflag_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_off_f32_i4 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x1c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rsq_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_off_f32_i4 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x1c,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sqrt_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_off_f32_i4 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x1c,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_sin_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_rpi_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cos_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_rpi_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x18,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_not_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_rpi_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x18,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_bfrev_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xa4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ffbh_u32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xa4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_ffbl_b32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xa4,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_ffbh_i32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x0e,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_frexp_exp_i32_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x0e,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_frexp_mant_f32 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_f16_u16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_f16_i16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cvt_u16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_cvt_u32_u16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00] +v_cvt_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rcp_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sqrt_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb0,0xfe,0x7e,0x7f,0x00,0x00,0x00] +v_rsq_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x4a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_log_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x4a,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_exp_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_exp_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x4a,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_frexp_mant_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_frexp_exp_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x76,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_floor_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x76,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_ceil_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_trunc_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_u32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x72,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_rndne_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbh_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x72,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_fract_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sin_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x74,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_cos_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_ffbl_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_norm_i16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] -v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_floor_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_floor_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_floor_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x48,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_fract_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xbe,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_fract_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xbe,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_fract_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xbe,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_fract_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x40,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_fract_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x40,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_fract_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x40,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_frexp_exp_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xb4,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb4,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb4,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_frexp_exp_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x7e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x7e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x7e,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_frexp_mant_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xb2,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xb2,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xb2,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_frexp_mant_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x80,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x80,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x80,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_log_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xae,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_log_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xae,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_log_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xae,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_log_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x4e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_log_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x4e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_log_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x4e,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_mov_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_mov_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x02,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_mov_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_movreld_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movreld_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movreld_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_movrels_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrels_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrels_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_movrelsd_2_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_movrelsd_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrelsd_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_movrelsd_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_not_b16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_not_b16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_not_b16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xd2,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_not_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x6e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_not_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x6e,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_not_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x6e,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_rcp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xa8,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xa8,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xa8,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_rcp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x54,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x54,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x54,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_rcp_iflag_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x56,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x56,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x56,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rndne_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xbc,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_rndne_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x46,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rndne_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x46,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rndne_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x46,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_rsq_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xac,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rsq_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xac,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rsq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xac,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x5c,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x5c,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_rsq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] - -v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sin_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xc0,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_sin_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x6a,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sin_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x6a,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sin_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x6a,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_sqrt_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xaa,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sqrt_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xaa,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sqrt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xaa,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_sqrt_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x66,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sqrt_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x66,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_sqrt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x66,0xfe,0x7f,0xff,0x00,0x00,0x00] - -v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_trunc_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0xba,0xfe,0x7e,0x7f,0x00,0x00,0x00] - -v_trunc_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: encoding: [0xe9,0x42,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_trunc_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: encoding: [0xea,0x42,0x0a,0x7e,0x01,0x77,0x39,0x05] - -v_trunc_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: encoding: [0xe9,0x42,0xfe,0x7f,0xff,0x00,0x00,0x00] +v_cvt_norm_u16_f16 v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX11: encoding: [0xea,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s index 5e2f3bec9a24f..146f28a4c1daa 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s @@ -1945,9 +1945,6 @@ v_cmpx_lg_f16 vcc_lo, v2 v_cmpx_lg_f16 vcc_hi, v2 // GFX11: encoding: [0x6b,0x04,0x0a,0x7d] -v_cmpx_lg_f16 ttmp15, v2 -// GFX11: encoding: [0x7b,0x04,0x0a,0x7d] - v_cmpx_lg_f16 m0, v2 // GFX11: encoding: [0x7d,0x04,0x0a,0x7d] @@ -2944,120 +2941,6 @@ v_cmpx_ngt_f16 v127, v2 v_cmpx_ngt_f16 s1, v2 // GFX11: encoding: [0x01,0x04,0x16,0x7d] -v_cmpx_ngt_f16 s105, v2 -// GFX11: encoding: [0x69,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 vcc_lo, v2 -// GFX11: encoding: [0x6a,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 vcc_hi, v2 -// GFX11: encoding: [0x6b,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 ttmp15, v2 -// GFX11: encoding: [0x7b,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 m0, v2 -// GFX11: encoding: [0x7d,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 exec_lo, v2 -// GFX11: encoding: [0x7e,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 exec_hi, v2 -// GFX11: encoding: [0x7f,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 null, v2 -// GFX11: encoding: [0x7c,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 -1, v2 -// GFX11: encoding: [0xc1,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 0.5, v2 -// GFX11: encoding: [0xf0,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 src_scc, v2 -// GFX11: encoding: [0xfd,0x04,0x16,0x7d] - -v_cmpx_ngt_f16 0xfe0b, v127 -// GFX11: encoding: [0xff,0xfe,0x16,0x7d,0x0b,0xfe,0x00,0x00] - -v_cmpx_ngt_f32 v1, v2 -// GFX11: encoding: [0x01,0x05,0x36,0x7d] - -v_cmpx_ngt_f32 v255, v2 -// GFX11: encoding: [0xff,0x05,0x36,0x7d] - -v_cmpx_ngt_f32 s1, v2 -// GFX11: encoding: [0x01,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 s105, v2 -// GFX11: encoding: [0x69,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 vcc_lo, v2 -// GFX11: encoding: [0x6a,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 vcc_hi, v2 -// GFX11: encoding: [0x6b,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 ttmp15, v2 -// GFX11: encoding: [0x7b,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 m0, v2 -// GFX11: encoding: [0x7d,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 exec_lo, v2 -// GFX11: encoding: [0x7e,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 exec_hi, v2 -// GFX11: encoding: [0x7f,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 null, v2 -// GFX11: encoding: [0x7c,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 -1, v2 -// GFX11: encoding: [0xc1,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 0.5, v2 -// GFX11: encoding: [0xf0,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 src_scc, v2 -// GFX11: encoding: [0xfd,0x04,0x36,0x7d] - -v_cmpx_ngt_f32 0xaf123456, v255 -// GFX11: encoding: [0xff,0xfe,0x37,0x7d,0x56,0x34,0x12,0xaf] - -v_cmpx_ngt_f64 v[1:2], v[2:3] -// GFX11: encoding: [0x01,0x05,0x56,0x7d] - -v_cmpx_ngt_f64 v[254:255], v[2:3] -// GFX11: encoding: [0xfe,0x05,0x56,0x7d] - -v_cmpx_ngt_f64 s[2:3], v[2:3] -// GFX11: encoding: [0x02,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 s[104:105], v[2:3] -// GFX11: encoding: [0x68,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 vcc, v[2:3] -// GFX11: encoding: [0x6a,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 ttmp[14:15], v[2:3] -// GFX11: encoding: [0x7a,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 exec, v[2:3] -// GFX11: encoding: [0x7e,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 null, v[2:3] -// GFX11: encoding: [0x7c,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 -1, v[2:3] -// GFX11: encoding: [0xc1,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 0.5, v[2:3] -// GFX11: encoding: [0xf0,0x04,0x56,0x7d] - -v_cmpx_ngt_f64 src_scc, v[2:3] -// GFX11: encoding: [0xfd,0x04,0x56,0x7d] - v_cmpx_ngt_f64 0xaf123456, v[254:255] // GFX11: encoding: [0xff,0xfc,0x57,0x7d,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index bf77eeffb06e4..df5f2889bae29 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -237,7 +237,7 @@ global_atomic_min_f64 v[0:1], v[2:3], off scc global_atomic_max_f64 v[0:1], v[2:3], off scc // GFX90A: error: scc is not supported on this GPU -buffer_atomic_add_f32 v4, off, s[8:11], s3 scc +flat_load_dword v0, v[0:1] scc // GFX90A: error: scc is not supported on this GPU buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 scc diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt new file mode 100644 index 0000000000000..ee44f75a57944 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -0,0 +1,48206 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# GFX11: s_absdiff_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x83] +0x01,0x02,0x7f,0x83 + +# GFX11: s_absdiff_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x83] +0x01,0x02,0x7e,0x83 + +# GFX11: s_absdiff_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x83] +0x01,0x02,0x7d,0x83 + +# GFX11: s_absdiff_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x83] +0xf0,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x83] +0x80,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x83,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x83,0x73,0x72,0x71,0x3f + +# GFX11: s_absdiff_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x83,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x83,0x56,0x34,0x12,0xaf + +# GFX11: s_absdiff_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x83] +0xc1,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x83] +0xf7,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x83] +0x7f,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x83] +0x7e,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x83] +0x7d,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x83] +0x68,0x67,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x83] +0x68,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x83] +0x01,0xf0,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x83] +0x01,0x80,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x83,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x83,0x73,0x72,0x71,0x3f + +# GFX11: s_absdiff_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x83,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x83,0x56,0x34,0x12,0xaf + +# GFX11: s_absdiff_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x83] +0x01,0xc1,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x83] +0x01,0xf7,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x83] +0x01,0x7f,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x83] +0x01,0x7e,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x83] +0x01,0x7d,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x83] +0x01,0x67,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x83] +0x01,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x83] +0x01,0x6b,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x83] +0x01,0x6a,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x83] +0x6b,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x83] +0x6a,0x02,0x00,0x83 + +# GFX11: s_absdiff_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x83] +0x68,0x67,0x69,0x83 + +# GFX11: s_absdiff_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x83] +0x68,0x02,0x69,0x83 + +# GFX11: s_absdiff_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x83] +0x01,0x67,0x69,0x83 + +# GFX11: s_absdiff_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x83] +0x01,0x02,0x69,0x83 + +# GFX11: s_absdiff_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x83] +0x01,0x02,0x6b,0x83 + +# GFX11: s_absdiff_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x83] +0x01,0x02,0x6a,0x83 + +# GFX11: s_abs_i32 exec_hi, s1 ; encoding: [0x01,0x15,0xff,0xbe] +0x01,0x15,0xff,0xbe + +# GFX11: s_abs_i32 exec_lo, s1 ; encoding: [0x01,0x15,0xfe,0xbe] +0x01,0x15,0xfe,0xbe + +# GFX11: s_abs_i32 m0, s1 ; encoding: [0x01,0x15,0xfd,0xbe] +0x01,0x15,0xfd,0xbe + +# GFX11: s_abs_i32 s0, 0.5 ; encoding: [0xf0,0x15,0x80,0xbe] +0xf0,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, 0 ; encoding: [0x80,0x15,0x80,0xbe] +0x80,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, 0x3f717273 ; encoding: [0xff,0x15,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x15,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_abs_i32 s0, 0xaf123456 ; encoding: [0xff,0x15,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x15,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_abs_i32 s0, -1 ; encoding: [0xc1,0x15,0x80,0xbe] +0xc1,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, -4.0 ; encoding: [0xf7,0x15,0x80,0xbe] +0xf7,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, exec_hi ; encoding: [0x7f,0x15,0x80,0xbe] +0x7f,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, exec_lo ; encoding: [0x7e,0x15,0x80,0xbe] +0x7e,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, m0 ; encoding: [0x7d,0x15,0x80,0xbe] +0x7d,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, s104 ; encoding: [0x68,0x15,0x80,0xbe] +0x68,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, s1 ; encoding: [0x01,0x15,0x80,0xbe] +0x01,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, vcc_hi ; encoding: [0x6b,0x15,0x80,0xbe] +0x6b,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s0, vcc_lo ; encoding: [0x6a,0x15,0x80,0xbe] +0x6a,0x15,0x80,0xbe + +# GFX11: s_abs_i32 s105, s104 ; encoding: [0x68,0x15,0xe9,0xbe] +0x68,0x15,0xe9,0xbe + +# GFX11: s_abs_i32 s105, s1 ; encoding: [0x01,0x15,0xe9,0xbe] +0x01,0x15,0xe9,0xbe + +# GFX11: s_abs_i32 vcc_hi, s1 ; encoding: [0x01,0x15,0xeb,0xbe] +0x01,0x15,0xeb,0xbe + +# GFX11: s_abs_i32 vcc_lo, s1 ; encoding: [0x01,0x15,0xea,0xbe] +0x01,0x15,0xea,0xbe + +# GFX11: s_addc_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x82] +0x01,0x02,0x7f,0x82 + +# GFX11: s_addc_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x82] +0x01,0x02,0x7e,0x82 + +# GFX11: s_addc_u32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x82] +0x01,0x02,0x7d,0x82 + +# GFX11: s_addc_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x82] +0xf0,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x82] +0x80,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x82,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x82,0x73,0x72,0x71,0x3f + +# GFX11: s_addc_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x82,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x82,0x56,0x34,0x12,0xaf + +# GFX11: s_addc_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x82] +0xc1,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x82] +0xf7,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x82] +0x7f,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x82] +0x7e,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x82] +0x7d,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x82] +0x68,0x67,0x00,0x82 + +# GFX11: s_addc_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x82] +0x68,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x82] +0x01,0xf0,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x82] +0x01,0x80,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x82,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x82,0x73,0x72,0x71,0x3f + +# GFX11: s_addc_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x82,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x82,0x56,0x34,0x12,0xaf + +# GFX11: s_addc_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x82] +0x01,0xc1,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x82] +0x01,0xf7,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x82] +0x01,0x7f,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x82] +0x01,0x7e,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x82] +0x01,0x7d,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x82] +0x01,0x67,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x82] +0x01,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x82] +0x01,0x6b,0x00,0x82 + +# GFX11: s_addc_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x82] +0x01,0x6a,0x00,0x82 + +# GFX11: s_addc_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x82] +0x6b,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x82] +0x6a,0x02,0x00,0x82 + +# GFX11: s_addc_u32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x82] +0x68,0x67,0x69,0x82 + +# GFX11: s_addc_u32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x82] +0x68,0x02,0x69,0x82 + +# GFX11: s_addc_u32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x82] +0x01,0x67,0x69,0x82 + +# GFX11: s_addc_u32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x82] +0x01,0x02,0x69,0x82 + +# GFX11: s_addc_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x82] +0x01,0x02,0x6b,0x82 + +# GFX11: s_addc_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x82] +0x01,0x02,0x6a,0x82 + +# GFX11: s_add_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x81] +0x01,0x02,0x7f,0x81 + +# GFX11: s_add_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x81] +0x01,0x02,0x7e,0x81 + +# GFX11: s_add_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x81] +0x01,0x02,0x7d,0x81 + +# GFX11: s_add_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x81] +0xf0,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x81] +0x80,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x81,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x81,0x73,0x72,0x71,0x3f + +# GFX11: s_add_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x81,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x81,0x56,0x34,0x12,0xaf + +# GFX11: s_add_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x81] +0xc1,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x81] +0xf7,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x81] +0x7f,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x81] +0x7e,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x81] +0x7d,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x81] +0x68,0x67,0x00,0x81 + +# GFX11: s_add_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x81] +0x68,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x81] +0x01,0xf0,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x81] +0x01,0x80,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x81,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x81,0x73,0x72,0x71,0x3f + +# GFX11: s_add_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x81,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x81,0x56,0x34,0x12,0xaf + +# GFX11: s_add_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x81] +0x01,0xc1,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x81] +0x01,0xf7,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x81] +0x01,0x7f,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x81] +0x01,0x7e,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x81] +0x01,0x7d,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x81] +0x01,0x67,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x81] +0x01,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x81] +0x01,0x6b,0x00,0x81 + +# GFX11: s_add_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x81] +0x01,0x6a,0x00,0x81 + +# GFX11: s_add_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x81] +0x6b,0x02,0x00,0x81 + +# GFX11: s_add_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x81] +0x6a,0x02,0x00,0x81 + +# GFX11: s_add_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x81] +0x68,0x67,0x69,0x81 + +# GFX11: s_add_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x81] +0x68,0x02,0x69,0x81 + +# GFX11: s_add_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x81] +0x01,0x67,0x69,0x81 + +# GFX11: s_add_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x81] +0x01,0x02,0x69,0x81 + +# GFX11: s_add_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x81] +0x01,0x02,0x6b,0x81 + +# GFX11: s_add_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x81] +0x01,0x02,0x6a,0x81 + +# GFX11: s_addk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb7] +0x34,0x12,0xff,0xb7 + +# GFX11: s_addk_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb7] +0x34,0x12,0xfe,0xb7 + +# GFX11: s_addk_i32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb7] +0x34,0x12,0xfd,0xb7 + +# GFX11: s_addk_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb7] +0x34,0x12,0x80,0xb7 + +# GFX11: s_addk_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb7] +0xd1,0xc1,0x80,0xb7 + +# GFX11: s_addk_i32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb7] +0x34,0x12,0xe9,0xb7 + +# GFX11: s_addk_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb7] +0x34,0x12,0xeb,0xb7 + +# GFX11: s_addk_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb7] +0x34,0x12,0xea,0xb7 + +# GFX11: s_add_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x80] +0x01,0x02,0x7f,0x80 + +# GFX11: s_add_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x80] +0x01,0x02,0x7e,0x80 + +# GFX11: s_add_u32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x80] +0x01,0x02,0x7d,0x80 + +# GFX11: s_add_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x80] +0xf0,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x80] +0x80,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x80,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x80,0x73,0x72,0x71,0x3f + +# GFX11: s_add_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x80,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x80,0x56,0x34,0x12,0xaf + +# GFX11: s_add_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x80] +0xc1,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x80] +0xf7,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x80] +0x7f,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x80] +0x7e,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x80] +0x7d,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x80] +0x68,0x67,0x00,0x80 + +# GFX11: s_add_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x80] +0x68,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x80] +0x01,0xf0,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x80] +0x01,0x80,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x80,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x80,0x73,0x72,0x71,0x3f + +# GFX11: s_add_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x80,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x80,0x56,0x34,0x12,0xaf + +# GFX11: s_add_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x80] +0x01,0xc1,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x80] +0x01,0xf7,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x80] +0x01,0x7f,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x80] +0x01,0x7e,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x80] +0x01,0x7d,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x80] +0x01,0x67,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x80] +0x01,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x80] +0x01,0x6b,0x00,0x80 + +# GFX11: s_add_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x80] +0x01,0x6a,0x00,0x80 + +# GFX11: s_add_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x80] +0x6b,0x02,0x00,0x80 + +# GFX11: s_add_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x80] +0x6a,0x02,0x00,0x80 + +# GFX11: s_add_u32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x80] +0x68,0x67,0x69,0x80 + +# GFX11: s_add_u32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x80] +0x68,0x02,0x69,0x80 + +# GFX11: s_add_u32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x80] +0x01,0x67,0x69,0x80 + +# GFX11: s_add_u32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x80] +0x01,0x02,0x69,0x80 + +# GFX11: s_add_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x80] +0x01,0x02,0x6b,0x80 + +# GFX11: s_add_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x80] +0x01,0x02,0x6a,0x80 + +# GFX11: s_and_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8b] +0x01,0x02,0x7f,0x8b + +# GFX11: s_and_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8b] +0x01,0x02,0x7e,0x8b + +# GFX11: s_and_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8b] +0x01,0x02,0x7d,0x8b + +# GFX11: s_and_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8b] +0xf0,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8b] +0x80,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8b,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8b,0x73,0x72,0x71,0x3f + +# GFX11: s_and_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8b,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8b,0x56,0x34,0x12,0xaf + +# GFX11: s_and_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8b] +0xc1,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8b] +0xf7,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8b] +0x7f,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8b] +0x7e,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8b] +0x7d,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8b] +0x68,0x67,0x00,0x8b + +# GFX11: s_and_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8b] +0x68,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8b] +0x01,0xf0,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8b] +0x01,0x80,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8b,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8b,0x73,0x72,0x71,0x3f + +# GFX11: s_and_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8b,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8b,0x56,0x34,0x12,0xaf + +# GFX11: s_and_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8b] +0x01,0xc1,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8b] +0x01,0xf7,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8b] +0x01,0x7f,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8b] +0x01,0x7e,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8b] +0x01,0x7d,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8b] +0x01,0x67,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8b] +0x01,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8b] +0x01,0x6b,0x00,0x8b + +# GFX11: s_and_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8b] +0x01,0x6a,0x00,0x8b + +# GFX11: s_and_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8b] +0x6b,0x02,0x00,0x8b + +# GFX11: s_and_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8b] +0x6a,0x02,0x00,0x8b + +# GFX11: s_and_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8b] +0x68,0x67,0x69,0x8b + +# GFX11: s_and_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8b] +0x68,0x02,0x69,0x8b + +# GFX11: s_and_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8b] +0x01,0x67,0x69,0x8b + +# GFX11: s_and_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8b] +0x01,0x02,0x69,0x8b + +# GFX11: s_and_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8b] +0x01,0x02,0x6b,0x8b + +# GFX11: s_and_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8b] +0x01,0x02,0x6a,0x8b + +# GFX11: s_and_b32 s0, s1, null ; encoding: [0x01,0x7c,0x00,0x8b] +0x01,0x7c,0x00,0x8b + +# GFX11: s_and_b32 s0, null, s2 ; encoding: [0x7c,0x02,0x00,0x8b] +0x7c,0x02,0x00,0x8b + +# GFX11: s_and_b32 null, s1, s2 ; encoding: [0x01,0x02,0x7c,0x8b] +0x01,0x02,0x7c,0x8b + +# GFX11: s_and_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x8b] +0x02,0x04,0xfe,0x8b + +# GFX11: s_and_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x8b] +0xf0,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x8b] +0x80,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x8b,0x73,0x72,0x71,0x3f + +# GFX11: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf + +# GFX11: s_and_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8b] +0xc1,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x8b] +0xf7,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x8b] +0x7e,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x8b] +0x66,0x64,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x8b] +0x66,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x8b] +0x02,0xf0,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x8b] +0x02,0x80,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x8b,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x8b,0x73,0x72,0x71,0x3f + +# GFX11: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf + +# GFX11: s_and_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8b] +0x02,0xc1,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x8b] +0x02,0xf7,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8b] +0x02,0x7e,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x8b] +0x02,0x64,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x8b] +0x02,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x8b] +0x02,0x6a,0x80,0x8b + +# GFX11: s_and_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x8b] +0x6a,0x04,0x80,0x8b + +# GFX11: s_and_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x8b] +0x66,0x64,0xe8,0x8b + +# GFX11: s_and_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x8b] +0x66,0x04,0xe8,0x8b + +# GFX11: s_and_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x8b] +0x02,0x64,0xe8,0x8b + +# GFX11: s_and_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x8b] +0x02,0x04,0xe8,0x8b + +# GFX11: s_and_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x8b] +0x02,0x04,0xea,0x8b + +# GFX11: s_and_not0_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x2c,0x80,0xbe] +0xf0,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, 0 ; encoding: [0x80,0x2c,0x80,0xbe] +0x80,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x2c,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2c,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not0_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x2c,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2c,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not0_saveexec_b32 s0, -1 ; encoding: [0xc1,0x2c,0x80,0xbe] +0xc1,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x2c,0x80,0xbe] +0xf7,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x2c,0x80,0xbe] +0x7f,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x2c,0x80,0xbe] +0x7e,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, m0 ; encoding: [0x7d,0x2c,0x80,0xbe] +0x7d,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, s104 ; encoding: [0x68,0x2c,0x80,0xbe] +0x68,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, s1 ; encoding: [0x01,0x2c,0x80,0xbe] +0x01,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x2c,0x80,0xbe] +0x6b,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x2c,0x80,0xbe] +0x6a,0x2c,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b32 s105, s104 ; encoding: [0x68,0x2c,0xe9,0xbe] +0x68,0x2c,0xe9,0xbe + +# GFX11: s_and_not0_saveexec_b32 s105, s1 ; encoding: [0x01,0x2c,0xe9,0xbe] +0x01,0x2c,0xe9,0xbe + +# GFX11: s_and_not0_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x2c,0xeb,0xbe] +0x01,0x2c,0xeb,0xbe + +# GFX11: s_and_not0_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x2c,0xea,0xbe] +0x01,0x2c,0xea,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x2d,0x80,0xbe] +0xf0,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x2d,0x80,0xbe] +0x80,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x2d,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2d,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2d,0x80,0xbe] +0xc1,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x2d,0x80,0xbe] +0xf7,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x2d,0x80,0xbe] +0x7e,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x2d,0x80,0xbe] +0x66,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2d,0x80,0xbe] +0x02,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x2d,0x80,0xbe] +0x6a,0x2d,0x80,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x2d,0xe8,0xbe] +0x66,0x2d,0xe8,0xbe + +# GFX11: s_and_not0_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x2d,0xe8,0xbe] +0x02,0x2d,0xe8,0xbe + +# GFX11: s_and_not0_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x2d,0xea,0xbe] +0x02,0x2d,0xea,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, 0.5 ; encoding: [0xf0,0x34,0x80,0xbe] +0xf0,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, 0 ; encoding: [0x80,0x34,0x80,0xbe] +0x80,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x34,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x34,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not0_wrexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x34,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x34,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not0_wrexec_b32 s0, -1 ; encoding: [0xc1,0x34,0x80,0xbe] +0xc1,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, -4.0 ; encoding: [0xf7,0x34,0x80,0xbe] +0xf7,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, exec_hi ; encoding: [0x7f,0x34,0x80,0xbe] +0x7f,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, exec_lo ; encoding: [0x7e,0x34,0x80,0xbe] +0x7e,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, m0 ; encoding: [0x7d,0x34,0x80,0xbe] +0x7d,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, s104 ; encoding: [0x68,0x34,0x80,0xbe] +0x68,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, s1 ; encoding: [0x01,0x34,0x80,0xbe] +0x01,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, vcc_hi ; encoding: [0x6b,0x34,0x80,0xbe] +0x6b,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s0, vcc_lo ; encoding: [0x6a,0x34,0x80,0xbe] +0x6a,0x34,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b32 s105, s104 ; encoding: [0x68,0x34,0xe9,0xbe] +0x68,0x34,0xe9,0xbe + +# GFX11: s_and_not0_wrexec_b32 s105, s1 ; encoding: [0x01,0x34,0xe9,0xbe] +0x01,0x34,0xe9,0xbe + +# GFX11: s_and_not0_wrexec_b32 vcc_hi, s1 ; encoding: [0x01,0x34,0xeb,0xbe] +0x01,0x34,0xeb,0xbe + +# GFX11: s_and_not0_wrexec_b32 vcc_lo, s1 ; encoding: [0x01,0x34,0xea,0xbe] +0x01,0x34,0xea,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x35,0x80,0xbe] +0xf0,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], 0 ; encoding: [0x80,0x35,0x80,0xbe] +0x80,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not0_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x35,0x80,0xbe] +0xc1,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x35,0x80,0xbe] +0xf7,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], exec ; encoding: [0x7e,0x35,0x80,0xbe] +0x7e,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x35,0x80,0xbe] +0x66,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x35,0x80,0xbe] +0x02,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[0:1], vcc ; encoding: [0x6a,0x35,0x80,0xbe] +0x6a,0x35,0x80,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x35,0xe8,0xbe] +0x66,0x35,0xe8,0xbe + +# GFX11: s_and_not0_wrexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x35,0xe8,0xbe] +0x02,0x35,0xe8,0xbe + +# GFX11: s_and_not0_wrexec_b64 vcc, s[2:3] ; encoding: [0x02,0x35,0xea,0xbe] +0x02,0x35,0xea,0xbe + +# GFX11: s_and_not1_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x91] +0x01,0x02,0x7f,0x91 + +# GFX11: s_and_not1_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x91] +0x01,0x02,0x7e,0x91 + +# GFX11: s_and_not1_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x91] +0x01,0x02,0x7d,0x91 + +# GFX11: s_and_not1_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x91] +0xf0,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x91] +0x80,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x91,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x91,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x91,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x91,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x91] +0xc1,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x91] +0xf7,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x91] +0x7f,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x91] +0x7e,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x91] +0x7d,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x91] +0x68,0x67,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x91] +0x68,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x91] +0x01,0xf0,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x91] +0x01,0x80,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x91,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x91,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x91,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x91,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x91] +0x01,0xc1,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x91] +0x01,0xf7,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x91] +0x01,0x7f,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x91] +0x01,0x7e,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x91] +0x01,0x7d,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x91] +0x01,0x67,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x91] +0x01,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x91] +0x01,0x6b,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x91] +0x01,0x6a,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x91] +0x6b,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x91] +0x6a,0x02,0x00,0x91 + +# GFX11: s_and_not1_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x91] +0x68,0x67,0x69,0x91 + +# GFX11: s_and_not1_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x91] +0x68,0x02,0x69,0x91 + +# GFX11: s_and_not1_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x91] +0x01,0x67,0x69,0x91 + +# GFX11: s_and_not1_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x91] +0x01,0x02,0x69,0x91 + +# GFX11: s_and_not1_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x91] +0x01,0x02,0x6b,0x91 + +# GFX11: s_and_not1_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x91] +0x01,0x02,0x6a,0x91 + +# GFX11: s_and_not1_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x91] +0x02,0x04,0xfe,0x91 + +# GFX11: s_and_not1_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x91] +0xf0,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x91] +0x80,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x91,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x91] +0xc1,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x91] +0xf7,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x91] +0x7e,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x91] +0x66,0x64,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x91] +0x66,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x91] +0x02,0xf0,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x91] +0x02,0x80,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x91,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x91,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x91] +0x02,0xc1,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x91] +0x02,0xf7,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x91] +0x02,0x7e,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x91] +0x02,0x64,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x91] +0x02,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x91] +0x02,0x6a,0x80,0x91 + +# GFX11: s_and_not1_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x91] +0x6a,0x04,0x80,0x91 + +# GFX11: s_and_not1_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x91] +0x66,0x64,0xe8,0x91 + +# GFX11: s_and_not1_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x91] +0x66,0x04,0xe8,0x91 + +# GFX11: s_and_not1_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x91] +0x02,0x64,0xe8,0x91 + +# GFX11: s_and_not1_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x91] +0x02,0x04,0xe8,0x91 + +# GFX11: s_and_not1_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x91] +0x02,0x04,0xea,0x91 + +# GFX11: s_and_not1_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x30,0x80,0xbe] +0xf0,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, 0 ; encoding: [0x80,0x30,0x80,0xbe] +0x80,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x30,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x30,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x30,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x30,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_saveexec_b32 s0, -1 ; encoding: [0xc1,0x30,0x80,0xbe] +0xc1,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x30,0x80,0xbe] +0xf7,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x30,0x80,0xbe] +0x7f,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x30,0x80,0xbe] +0x7e,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, m0 ; encoding: [0x7d,0x30,0x80,0xbe] +0x7d,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, s104 ; encoding: [0x68,0x30,0x80,0xbe] +0x68,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, s1 ; encoding: [0x01,0x30,0x80,0xbe] +0x01,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x30,0x80,0xbe] +0x6b,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x30,0x80,0xbe] +0x6a,0x30,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b32 s105, s104 ; encoding: [0x68,0x30,0xe9,0xbe] +0x68,0x30,0xe9,0xbe + +# GFX11: s_and_not1_saveexec_b32 s105, s1 ; encoding: [0x01,0x30,0xe9,0xbe] +0x01,0x30,0xe9,0xbe + +# GFX11: s_and_not1_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x30,0xeb,0xbe] +0x01,0x30,0xeb,0xbe + +# GFX11: s_and_not1_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x30,0xea,0xbe] +0x01,0x30,0xea,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x31,0x80,0xbe] +0xf0,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x31,0x80,0xbe] +0x80,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x31,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x31,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x31,0x80,0xbe] +0xc1,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x31,0x80,0xbe] +0xf7,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x31,0x80,0xbe] +0x7e,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x31,0x80,0xbe] +0x66,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x31,0x80,0xbe] +0x02,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x31,0x80,0xbe] +0x6a,0x31,0x80,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x31,0xe8,0xbe] +0x66,0x31,0xe8,0xbe + +# GFX11: s_and_not1_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x31,0xe8,0xbe] +0x02,0x31,0xe8,0xbe + +# GFX11: s_and_not1_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x31,0xea,0xbe] +0x02,0x31,0xea,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, 0.5 ; encoding: [0xf0,0x36,0x80,0xbe] +0xf0,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, 0 ; encoding: [0x80,0x36,0x80,0xbe] +0x80,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x36,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x36,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_wrexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x36,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x36,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_wrexec_b32 s0, -1 ; encoding: [0xc1,0x36,0x80,0xbe] +0xc1,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, -4.0 ; encoding: [0xf7,0x36,0x80,0xbe] +0xf7,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, exec_hi ; encoding: [0x7f,0x36,0x80,0xbe] +0x7f,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, exec_lo ; encoding: [0x7e,0x36,0x80,0xbe] +0x7e,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, m0 ; encoding: [0x7d,0x36,0x80,0xbe] +0x7d,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, s104 ; encoding: [0x68,0x36,0x80,0xbe] +0x68,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, s1 ; encoding: [0x01,0x36,0x80,0xbe] +0x01,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, vcc_hi ; encoding: [0x6b,0x36,0x80,0xbe] +0x6b,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s0, vcc_lo ; encoding: [0x6a,0x36,0x80,0xbe] +0x6a,0x36,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b32 s105, s104 ; encoding: [0x68,0x36,0xe9,0xbe] +0x68,0x36,0xe9,0xbe + +# GFX11: s_and_not1_wrexec_b32 s105, s1 ; encoding: [0x01,0x36,0xe9,0xbe] +0x01,0x36,0xe9,0xbe + +# GFX11: s_and_not1_wrexec_b32 vcc_hi, s1 ; encoding: [0x01,0x36,0xeb,0xbe] +0x01,0x36,0xeb,0xbe + +# GFX11: s_and_not1_wrexec_b32 vcc_lo, s1 ; encoding: [0x01,0x36,0xea,0xbe] +0x01,0x36,0xea,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x37,0x80,0xbe] +0xf0,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], 0 ; encoding: [0x80,0x37,0x80,0xbe] +0x80,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x37,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x37,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_not1_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x37,0x80,0xbe] +0xc1,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x37,0x80,0xbe] +0xf7,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], exec ; encoding: [0x7e,0x37,0x80,0xbe] +0x7e,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x37,0x80,0xbe] +0x66,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x37,0x80,0xbe] +0x02,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[0:1], vcc ; encoding: [0x6a,0x37,0x80,0xbe] +0x6a,0x37,0x80,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x37,0xe8,0xbe] +0x66,0x37,0xe8,0xbe + +# GFX11: s_and_not1_wrexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x37,0xe8,0xbe] +0x02,0x37,0xe8,0xbe + +# GFX11: s_and_not1_wrexec_b64 vcc, s[2:3] ; encoding: [0x02,0x37,0xea,0xbe] +0x02,0x37,0xea,0xbe + +# GFX11: s_and_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x20,0x80,0xbe] +0xf0,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, 0 ; encoding: [0x80,0x20,0x80,0xbe] +0x80,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x20,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x20,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x20,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x20,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_saveexec_b32 s0, -1 ; encoding: [0xc1,0x20,0x80,0xbe] +0xc1,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x20,0x80,0xbe] +0xf7,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x20,0x80,0xbe] +0x7f,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x20,0x80,0xbe] +0x7e,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, m0 ; encoding: [0x7d,0x20,0x80,0xbe] +0x7d,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, s104 ; encoding: [0x68,0x20,0x80,0xbe] +0x68,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, s1 ; encoding: [0x01,0x20,0x80,0xbe] +0x01,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x20,0x80,0xbe] +0x6b,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x20,0x80,0xbe] +0x6a,0x20,0x80,0xbe + +# GFX11: s_and_saveexec_b32 s105, s104 ; encoding: [0x68,0x20,0xe9,0xbe] +0x68,0x20,0xe9,0xbe + +# GFX11: s_and_saveexec_b32 s105, s1 ; encoding: [0x01,0x20,0xe9,0xbe] +0x01,0x20,0xe9,0xbe + +# GFX11: s_and_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x20,0xeb,0xbe] +0x01,0x20,0xeb,0xbe + +# GFX11: s_and_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x20,0xea,0xbe] +0x01,0x20,0xea,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x21,0x80,0xbe] +0xf0,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x21,0x80,0xbe] +0x80,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x21,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x21,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_and_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x21,0x80,0xbe] +0xc1,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x21,0x80,0xbe] +0xf7,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x21,0x80,0xbe] +0x7e,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x21,0x80,0xbe] +0x66,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x21,0x80,0xbe] +0x02,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x21,0x80,0xbe] +0x6a,0x21,0x80,0xbe + +# GFX11: s_and_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x21,0xe8,0xbe] +0x66,0x21,0xe8,0xbe + +# GFX11: s_and_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x21,0xe8,0xbe] +0x02,0x21,0xe8,0xbe + +# GFX11: s_and_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x21,0xea,0xbe] +0x02,0x21,0xea,0xbe + +# GFX11: s_ashr_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x86] +0x01,0x02,0x7f,0x86 + +# GFX11: s_ashr_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x86] +0x01,0x02,0x7e,0x86 + +# GFX11: s_ashr_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x86] +0x01,0x02,0x7d,0x86 + +# GFX11: s_ashr_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x86] +0xf0,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x86] +0x80,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x86,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x86,0x73,0x72,0x71,0x3f + +# GFX11: s_ashr_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x86,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x86,0x56,0x34,0x12,0xaf + +# GFX11: s_ashr_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x86] +0xc1,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x86] +0xf7,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x86] +0x7f,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x86] +0x7e,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x86] +0x7d,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x86] +0x68,0x67,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x86] +0x68,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x86] +0x01,0xf0,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x86] +0x01,0x80,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x86,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x86,0x73,0x72,0x71,0x3f + +# GFX11: s_ashr_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x86,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x86,0x56,0x34,0x12,0xaf + +# GFX11: s_ashr_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x86] +0x01,0xc1,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x86] +0x01,0xf7,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x86] +0x01,0x7f,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x86] +0x01,0x7e,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x86] +0x01,0x7d,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x86] +0x01,0x67,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x86] +0x01,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x86] +0x01,0x6b,0x00,0x86 + +# GFX11: s_ashr_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x86] +0x01,0x6a,0x00,0x86 + +# GFX11: s_ashr_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x86] +0x6b,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x86] +0x6a,0x02,0x00,0x86 + +# GFX11: s_ashr_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x86] +0x68,0x67,0x69,0x86 + +# GFX11: s_ashr_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x86] +0x68,0x02,0x69,0x86 + +# GFX11: s_ashr_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x86] +0x01,0x67,0x69,0x86 + +# GFX11: s_ashr_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x86] +0x01,0x02,0x69,0x86 + +# GFX11: s_ashr_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x86] +0x01,0x02,0x6b,0x86 + +# GFX11: s_ashr_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x86] +0x01,0x02,0x6a,0x86 + +# GFX11: s_ashr_i64 exec, s[2:3], s4 ; encoding: [0x02,0x04,0xfe,0x86] +0x02,0x04,0xfe,0x86 + +# GFX11: s_ashr_i64 s[0:1], 0.5, s4 ; encoding: [0xf0,0x04,0x80,0x86] +0xf0,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], 0, s4 ; encoding: [0x80,0x04,0x80,0x86] +0x80,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], 0x3f717273, s4 ; encoding: [0xff,0x04,0x80,0x86,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x86,0x73,0x72,0x71,0x3f + +# GFX11: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf + +# GFX11: s_ashr_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x86] +0xc1,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], -4.0, s4 ; encoding: [0xf7,0x04,0x80,0x86] +0xf7,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], exec, s4 ; encoding: [0x7e,0x04,0x80,0x86] +0x7e,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[102:103], s100 ; encoding: [0x66,0x64,0x80,0x86] +0x66,0x64,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[102:103], s4 ; encoding: [0x66,0x04,0x80,0x86] +0x66,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x86] +0x02,0xf0,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x86] +0x02,0x80,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x86,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x86,0x73,0x72,0x71,0x3f + +# GFX11: s_ashr_i64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x86,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x86,0x56,0x34,0x12,0xaf + +# GFX11: s_ashr_i64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x86] +0x02,0xc1,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x86] +0x02,0xf7,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x86] +0x02,0x7e,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], s100 ; encoding: [0x02,0x64,0x80,0x86] +0x02,0x64,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], s4 ; encoding: [0x02,0x04,0x80,0x86] +0x02,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], s[2:3], vcc_lo ; encoding: [0x02,0x6a,0x80,0x86] +0x02,0x6a,0x80,0x86 + +# GFX11: s_ashr_i64 s[0:1], vcc, s4 ; encoding: [0x6a,0x04,0x80,0x86] +0x6a,0x04,0x80,0x86 + +# GFX11: s_ashr_i64 s[104:105], s[102:103], s100 ; encoding: [0x66,0x64,0xe8,0x86] +0x66,0x64,0xe8,0x86 + +# GFX11: s_ashr_i64 s[104:105], s[102:103], s4 ; encoding: [0x66,0x04,0xe8,0x86] +0x66,0x04,0xe8,0x86 + +# GFX11: s_ashr_i64 s[104:105], s[2:3], s100 ; encoding: [0x02,0x64,0xe8,0x86] +0x02,0x64,0xe8,0x86 + +# GFX11: s_ashr_i64 s[104:105], s[2:3], s4 ; encoding: [0x02,0x04,0xe8,0x86] +0x02,0x04,0xe8,0x86 + +# GFX11: s_ashr_i64 vcc, s[2:3], s4 ; encoding: [0x02,0x04,0xea,0x86] +0x02,0x04,0xea,0x86 + +# GFX11: s_barrier ; encoding: [0x00,0x00,0xbd,0xbf] +0x00,0x00,0xbd,0xbf + +# GFX11: s_bcnt0_i32_b32 exec_hi, s1 ; encoding: [0x01,0x16,0xff,0xbe] +0x01,0x16,0xff,0xbe + +# GFX11: s_bcnt0_i32_b32 exec_lo, s1 ; encoding: [0x01,0x16,0xfe,0xbe] +0x01,0x16,0xfe,0xbe + +# GFX11: s_bcnt0_i32_b32 m0, s1 ; encoding: [0x01,0x16,0xfd,0xbe] +0x01,0x16,0xfd,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, 0.5 ; encoding: [0xf0,0x16,0x80,0xbe] +0xf0,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, 0 ; encoding: [0x80,0x16,0x80,0xbe] +0x80,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, 0x3f717273 ; encoding: [0xff,0x16,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x16,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bcnt0_i32_b32 s0, 0xaf123456 ; encoding: [0xff,0x16,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x16,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bcnt0_i32_b32 s0, -1 ; encoding: [0xc1,0x16,0x80,0xbe] +0xc1,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, -4.0 ; encoding: [0xf7,0x16,0x80,0xbe] +0xf7,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, exec_hi ; encoding: [0x7f,0x16,0x80,0xbe] +0x7f,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, exec_lo ; encoding: [0x7e,0x16,0x80,0xbe] +0x7e,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, m0 ; encoding: [0x7d,0x16,0x80,0xbe] +0x7d,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, s104 ; encoding: [0x68,0x16,0x80,0xbe] +0x68,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, s1 ; encoding: [0x01,0x16,0x80,0xbe] +0x01,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, vcc_hi ; encoding: [0x6b,0x16,0x80,0xbe] +0x6b,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s0, vcc_lo ; encoding: [0x6a,0x16,0x80,0xbe] +0x6a,0x16,0x80,0xbe + +# GFX11: s_bcnt0_i32_b32 s105, s104 ; encoding: [0x68,0x16,0xe9,0xbe] +0x68,0x16,0xe9,0xbe + +# GFX11: s_bcnt0_i32_b32 s105, s1 ; encoding: [0x01,0x16,0xe9,0xbe] +0x01,0x16,0xe9,0xbe + +# GFX11: s_bcnt0_i32_b32 vcc_hi, s1 ; encoding: [0x01,0x16,0xeb,0xbe] +0x01,0x16,0xeb,0xbe + +# GFX11: s_bcnt0_i32_b32 vcc_lo, s1 ; encoding: [0x01,0x16,0xea,0xbe] +0x01,0x16,0xea,0xbe + +# GFX11: s_bcnt0_i32_b64 exec_hi, s[2:3] ; encoding: [0x02,0x17,0xff,0xbe] +0x02,0x17,0xff,0xbe + +# GFX11: s_bcnt0_i32_b64 exec_lo, s[2:3] ; encoding: [0x02,0x17,0xfe,0xbe] +0x02,0x17,0xfe,0xbe + +# GFX11: s_bcnt0_i32_b64 m0, s[2:3] ; encoding: [0x02,0x17,0xfd,0xbe] +0x02,0x17,0xfd,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, 0.5 ; encoding: [0xf0,0x17,0x80,0xbe] +0xf0,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, 0 ; encoding: [0x80,0x17,0x80,0xbe] +0x80,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, 0x3f717273 ; encoding: [0xff,0x17,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x17,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bcnt0_i32_b64 s0, -1 ; encoding: [0xc1,0x17,0x80,0xbe] +0xc1,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, -4.0 ; encoding: [0xf7,0x17,0x80,0xbe] +0xf7,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, exec ; encoding: [0x7e,0x17,0x80,0xbe] +0x7e,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, s[102:103] ; encoding: [0x66,0x17,0x80,0xbe] +0x66,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, s[2:3] ; encoding: [0x02,0x17,0x80,0xbe] +0x02,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s0, vcc ; encoding: [0x6a,0x17,0x80,0xbe] +0x6a,0x17,0x80,0xbe + +# GFX11: s_bcnt0_i32_b64 s105, s[102:103] ; encoding: [0x66,0x17,0xe9,0xbe] +0x66,0x17,0xe9,0xbe + +# GFX11: s_bcnt0_i32_b64 s105, s[2:3] ; encoding: [0x02,0x17,0xe9,0xbe] +0x02,0x17,0xe9,0xbe + +# GFX11: s_bcnt0_i32_b64 vcc_hi, s[2:3] ; encoding: [0x02,0x17,0xeb,0xbe] +0x02,0x17,0xeb,0xbe + +# GFX11: s_bcnt0_i32_b64 vcc_lo, s[2:3] ; encoding: [0x02,0x17,0xea,0xbe] +0x02,0x17,0xea,0xbe + +# GFX11: s_bcnt1_i32_b32 exec_hi, s1 ; encoding: [0x01,0x18,0xff,0xbe] +0x01,0x18,0xff,0xbe + +# GFX11: s_bcnt1_i32_b32 exec_lo, s1 ; encoding: [0x01,0x18,0xfe,0xbe] +0x01,0x18,0xfe,0xbe + +# GFX11: s_bcnt1_i32_b32 m0, s1 ; encoding: [0x01,0x18,0xfd,0xbe] +0x01,0x18,0xfd,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, 0.5 ; encoding: [0xf0,0x18,0x80,0xbe] +0xf0,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, 0 ; encoding: [0x80,0x18,0x80,0xbe] +0x80,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, 0x3f717273 ; encoding: [0xff,0x18,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x18,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bcnt1_i32_b32 s0, 0xaf123456 ; encoding: [0xff,0x18,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x18,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bcnt1_i32_b32 s0, -1 ; encoding: [0xc1,0x18,0x80,0xbe] +0xc1,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, -4.0 ; encoding: [0xf7,0x18,0x80,0xbe] +0xf7,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, exec_hi ; encoding: [0x7f,0x18,0x80,0xbe] +0x7f,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, exec_lo ; encoding: [0x7e,0x18,0x80,0xbe] +0x7e,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, m0 ; encoding: [0x7d,0x18,0x80,0xbe] +0x7d,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, s104 ; encoding: [0x68,0x18,0x80,0xbe] +0x68,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, s1 ; encoding: [0x01,0x18,0x80,0xbe] +0x01,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, vcc_hi ; encoding: [0x6b,0x18,0x80,0xbe] +0x6b,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s0, vcc_lo ; encoding: [0x6a,0x18,0x80,0xbe] +0x6a,0x18,0x80,0xbe + +# GFX11: s_bcnt1_i32_b32 s105, s104 ; encoding: [0x68,0x18,0xe9,0xbe] +0x68,0x18,0xe9,0xbe + +# GFX11: s_bcnt1_i32_b32 s105, s1 ; encoding: [0x01,0x18,0xe9,0xbe] +0x01,0x18,0xe9,0xbe + +# GFX11: s_bcnt1_i32_b32 vcc_hi, s1 ; encoding: [0x01,0x18,0xeb,0xbe] +0x01,0x18,0xeb,0xbe + +# GFX11: s_bcnt1_i32_b32 vcc_lo, s1 ; encoding: [0x01,0x18,0xea,0xbe] +0x01,0x18,0xea,0xbe + +# GFX11: s_bcnt1_i32_b64 exec_hi, s[2:3] ; encoding: [0x02,0x19,0xff,0xbe] +0x02,0x19,0xff,0xbe + +# GFX11: s_bcnt1_i32_b64 exec_lo, s[2:3] ; encoding: [0x02,0x19,0xfe,0xbe] +0x02,0x19,0xfe,0xbe + +# GFX11: s_bcnt1_i32_b64 m0, s[2:3] ; encoding: [0x02,0x19,0xfd,0xbe] +0x02,0x19,0xfd,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, 0.5 ; encoding: [0xf0,0x19,0x80,0xbe] +0xf0,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, 0 ; encoding: [0x80,0x19,0x80,0xbe] +0x80,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, 0x3f717273 ; encoding: [0xff,0x19,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x19,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bcnt1_i32_b64 s0, -1 ; encoding: [0xc1,0x19,0x80,0xbe] +0xc1,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, -4.0 ; encoding: [0xf7,0x19,0x80,0xbe] +0xf7,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, exec ; encoding: [0x7e,0x19,0x80,0xbe] +0x7e,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, s[102:103] ; encoding: [0x66,0x19,0x80,0xbe] +0x66,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, s[2:3] ; encoding: [0x02,0x19,0x80,0xbe] +0x02,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s0, vcc ; encoding: [0x6a,0x19,0x80,0xbe] +0x6a,0x19,0x80,0xbe + +# GFX11: s_bcnt1_i32_b64 s105, s[102:103] ; encoding: [0x66,0x19,0xe9,0xbe] +0x66,0x19,0xe9,0xbe + +# GFX11: s_bcnt1_i32_b64 s105, s[2:3] ; encoding: [0x02,0x19,0xe9,0xbe] +0x02,0x19,0xe9,0xbe + +# GFX11: s_bcnt1_i32_b64 vcc_hi, s[2:3] ; encoding: [0x02,0x19,0xeb,0xbe] +0x02,0x19,0xeb,0xbe + +# GFX11: s_bcnt1_i32_b64 vcc_lo, s[2:3] ; encoding: [0x02,0x19,0xea,0xbe] +0x02,0x19,0xea,0xbe + +# GFX11: s_bfe_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x93] +0x01,0x02,0xff,0x93 + +# GFX11: s_bfe_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x93] +0x01,0x02,0xfe,0x93 + +# GFX11: s_bfe_i32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x93] +0x01,0x02,0xfd,0x93 + +# GFX11: s_bfe_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x93] +0xf0,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x93] +0x80,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x93,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x93,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x93,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x93,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x93] +0xc1,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x93] +0xf7,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x93] +0x7f,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x93] +0x7e,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x93] +0x7d,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x93] +0x68,0x67,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x93] +0x68,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x93] +0x01,0xf0,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x93] +0x01,0x80,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x93,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x93,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x93,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x93,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x93] +0x01,0xc1,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x93] +0x01,0xf7,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x93] +0x01,0x7f,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x93] +0x01,0x7e,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x93] +0x01,0x7d,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x93] +0x01,0x67,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x93] +0x01,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x93] +0x01,0x6b,0x80,0x93 + +# GFX11: s_bfe_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x93] +0x01,0x6a,0x80,0x93 + +# GFX11: s_bfe_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x93] +0x6b,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x93] +0x6a,0x02,0x80,0x93 + +# GFX11: s_bfe_i32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x93] +0x68,0x67,0xe9,0x93 + +# GFX11: s_bfe_i32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x93] +0x68,0x02,0xe9,0x93 + +# GFX11: s_bfe_i32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x93] +0x01,0x67,0xe9,0x93 + +# GFX11: s_bfe_i32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x93] +0x01,0x02,0xe9,0x93 + +# GFX11: s_bfe_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x93] +0x01,0x02,0xeb,0x93 + +# GFX11: s_bfe_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x93] +0x01,0x02,0xea,0x93 + +# GFX11: s_bfe_i64 exec, s[2:3], s4 ; encoding: [0x02,0x04,0xfe,0x94] +0x02,0x04,0xfe,0x94 + +# GFX11: s_bfe_i64 s[0:1], 0.5, s4 ; encoding: [0xf0,0x04,0x80,0x94] +0xf0,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], 0, s4 ; encoding: [0x80,0x04,0x80,0x94] +0x80,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], 0x3f717273, s4 ; encoding: [0xff,0x04,0x80,0x94,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x94,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x94] +0xc1,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], -4.0, s4 ; encoding: [0xf7,0x04,0x80,0x94] +0xf7,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], exec, s4 ; encoding: [0x7e,0x04,0x80,0x94] +0x7e,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[102:103], s100 ; encoding: [0x66,0x64,0x80,0x94] +0x66,0x64,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[102:103], s4 ; encoding: [0x66,0x04,0x80,0x94] +0x66,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x94] +0x02,0xf0,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x94] +0x02,0x80,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x94,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x94,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_i64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x94,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x94,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_i64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x94] +0x02,0xc1,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x94] +0x02,0xf7,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x94] +0x02,0x7e,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], s100 ; encoding: [0x02,0x64,0x80,0x94] +0x02,0x64,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], s4 ; encoding: [0x02,0x04,0x80,0x94] +0x02,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], s[2:3], vcc_lo ; encoding: [0x02,0x6a,0x80,0x94] +0x02,0x6a,0x80,0x94 + +# GFX11: s_bfe_i64 s[0:1], vcc, s4 ; encoding: [0x6a,0x04,0x80,0x94] +0x6a,0x04,0x80,0x94 + +# GFX11: s_bfe_i64 s[104:105], s[102:103], s100 ; encoding: [0x66,0x64,0xe8,0x94] +0x66,0x64,0xe8,0x94 + +# GFX11: s_bfe_i64 s[104:105], s[102:103], s4 ; encoding: [0x66,0x04,0xe8,0x94] +0x66,0x04,0xe8,0x94 + +# GFX11: s_bfe_i64 s[104:105], s[2:3], s100 ; encoding: [0x02,0x64,0xe8,0x94] +0x02,0x64,0xe8,0x94 + +# GFX11: s_bfe_i64 s[104:105], s[2:3], s4 ; encoding: [0x02,0x04,0xe8,0x94] +0x02,0x04,0xe8,0x94 + +# GFX11: s_bfe_i64 vcc, s[2:3], s4 ; encoding: [0x02,0x04,0xea,0x94] +0x02,0x04,0xea,0x94 + +# GFX11: s_bfe_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x93] +0x01,0x02,0x7f,0x93 + +# GFX11: s_bfe_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x93] +0x01,0x02,0x7e,0x93 + +# GFX11: s_bfe_u32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x93] +0x01,0x02,0x7d,0x93 + +# GFX11: s_bfe_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x93] +0xf0,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x93] +0x80,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x93,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x93,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x93,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x93,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x93] +0xc1,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x93] +0xf7,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x93] +0x7f,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x93] +0x7e,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x93] +0x7d,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x93] +0x68,0x67,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x93] +0x68,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x93] +0x01,0xf0,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x93] +0x01,0x80,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x93,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x93,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x93,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x93,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x93] +0x01,0xc1,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x93] +0x01,0xf7,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x93] +0x01,0x7f,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x93] +0x01,0x7e,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x93] +0x01,0x7d,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x93] +0x01,0x67,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x93] +0x01,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x93] +0x01,0x6b,0x00,0x93 + +# GFX11: s_bfe_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x93] +0x01,0x6a,0x00,0x93 + +# GFX11: s_bfe_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x93] +0x6b,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x93] +0x6a,0x02,0x00,0x93 + +# GFX11: s_bfe_u32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x93] +0x68,0x67,0x69,0x93 + +# GFX11: s_bfe_u32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x93] +0x68,0x02,0x69,0x93 + +# GFX11: s_bfe_u32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x93] +0x01,0x67,0x69,0x93 + +# GFX11: s_bfe_u32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x93] +0x01,0x02,0x69,0x93 + +# GFX11: s_bfe_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x93] +0x01,0x02,0x6b,0x93 + +# GFX11: s_bfe_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x93] +0x01,0x02,0x6a,0x93 + +# GFX11: s_bfe_u64 exec, s[2:3], s4 ; encoding: [0x02,0x04,0x7e,0x94] +0x02,0x04,0x7e,0x94 + +# GFX11: s_bfe_u64 s[0:1], 0.5, s4 ; encoding: [0xf0,0x04,0x00,0x94] +0xf0,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], 0, s4 ; encoding: [0x80,0x04,0x00,0x94] +0x80,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], 0x3f717273, s4 ; encoding: [0xff,0x04,0x00,0x94,0x73,0x72,0x71,0x3f] +0xff,0x04,0x00,0x94,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf] +0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_u64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x00,0x94] +0xc1,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], -4.0, s4 ; encoding: [0xf7,0x04,0x00,0x94] +0xf7,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], exec, s4 ; encoding: [0x7e,0x04,0x00,0x94] +0x7e,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[102:103], s100 ; encoding: [0x66,0x64,0x00,0x94] +0x66,0x64,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[102:103], s4 ; encoding: [0x66,0x04,0x00,0x94] +0x66,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x00,0x94] +0x02,0xf0,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x00,0x94] +0x02,0x80,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x00,0x94,0x73,0x72,0x71,0x3f] +0x02,0xff,0x00,0x94,0x73,0x72,0x71,0x3f + +# GFX11: s_bfe_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0x94,0x56,0x34,0x12,0xaf] +0x02,0xff,0x00,0x94,0x56,0x34,0x12,0xaf + +# GFX11: s_bfe_u64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x00,0x94] +0x02,0xc1,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x00,0x94] +0x02,0xf7,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x00,0x94] +0x02,0x7e,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], s100 ; encoding: [0x02,0x64,0x00,0x94] +0x02,0x64,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], s4 ; encoding: [0x02,0x04,0x00,0x94] +0x02,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], s[2:3], vcc_lo ; encoding: [0x02,0x6a,0x00,0x94] +0x02,0x6a,0x00,0x94 + +# GFX11: s_bfe_u64 s[0:1], vcc, s4 ; encoding: [0x6a,0x04,0x00,0x94] +0x6a,0x04,0x00,0x94 + +# GFX11: s_bfe_u64 s[104:105], s[102:103], s100 ; encoding: [0x66,0x64,0x68,0x94] +0x66,0x64,0x68,0x94 + +# GFX11: s_bfe_u64 s[104:105], s[102:103], s4 ; encoding: [0x66,0x04,0x68,0x94] +0x66,0x04,0x68,0x94 + +# GFX11: s_bfe_u64 s[104:105], s[2:3], s100 ; encoding: [0x02,0x64,0x68,0x94] +0x02,0x64,0x68,0x94 + +# GFX11: s_bfe_u64 s[104:105], s[2:3], s4 ; encoding: [0x02,0x04,0x68,0x94] +0x02,0x04,0x68,0x94 + +# GFX11: s_bfe_u64 vcc, s[2:3], s4 ; encoding: [0x02,0x04,0x6a,0x94] +0x02,0x04,0x6a,0x94 + +# GFX11: s_bfm_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x95] +0x01,0x02,0x7f,0x95 + +# GFX11: s_bfm_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x95] +0x01,0x02,0x7e,0x95 + +# GFX11: s_bfm_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x95] +0x01,0x02,0x7d,0x95 + +# GFX11: s_bfm_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x95] +0xf0,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x95] +0x80,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x95,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x95,0x73,0x72,0x71,0x3f + +# GFX11: s_bfm_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x95,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x95,0x56,0x34,0x12,0xaf + +# GFX11: s_bfm_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x95] +0xc1,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x95] +0xf7,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x95] +0x7f,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x95] +0x7e,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x95] +0x7d,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x95] +0x68,0x67,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x95] +0x68,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x95] +0x01,0xf0,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x95] +0x01,0x80,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x95,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x95,0x73,0x72,0x71,0x3f + +# GFX11: s_bfm_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x95,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x95,0x56,0x34,0x12,0xaf + +# GFX11: s_bfm_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x95] +0x01,0xc1,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x95] +0x01,0xf7,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x95] +0x01,0x7f,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x95] +0x01,0x7e,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x95] +0x01,0x7d,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x95] +0x01,0x67,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x95] +0x01,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x95] +0x01,0x6b,0x00,0x95 + +# GFX11: s_bfm_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x95] +0x01,0x6a,0x00,0x95 + +# GFX11: s_bfm_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x95] +0x6b,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x95] +0x6a,0x02,0x00,0x95 + +# GFX11: s_bfm_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x95] +0x68,0x67,0x69,0x95 + +# GFX11: s_bfm_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x95] +0x68,0x02,0x69,0x95 + +# GFX11: s_bfm_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x95] +0x01,0x67,0x69,0x95 + +# GFX11: s_bfm_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x95] +0x01,0x02,0x69,0x95 + +# GFX11: s_bfm_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x95] +0x01,0x02,0x6b,0x95 + +# GFX11: s_bfm_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x95] +0x01,0x02,0x6a,0x95 + +# GFX11: s_bfm_b64 exec, s2, s3 ; encoding: [0x02,0x03,0xfe,0x95] +0x02,0x03,0xfe,0x95 + +# GFX11: s_bfm_b64 s[0:1], 0.5, s3 ; encoding: [0xf0,0x03,0x80,0x95] +0xf0,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], 0, s3 ; encoding: [0x80,0x03,0x80,0x95] +0x80,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], 0x3f717273, s3 ; encoding: [0xff,0x03,0x80,0x95,0x73,0x72,0x71,0x3f] +0xff,0x03,0x80,0x95,0x73,0x72,0x71,0x3f + +# GFX11: s_bfm_b64 s[0:1], 0xaf123456, s3 ; encoding: [0xff,0x03,0x80,0x95,0x56,0x34,0x12,0xaf] +0xff,0x03,0x80,0x95,0x56,0x34,0x12,0xaf + +# GFX11: s_bfm_b64 s[0:1], -1, s3 ; encoding: [0xc1,0x03,0x80,0x95] +0xc1,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], -4.0, s3 ; encoding: [0xf7,0x03,0x80,0x95] +0xf7,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], exec_hi, s3 ; encoding: [0x7f,0x03,0x80,0x95] +0x7f,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], exec_lo, s3 ; encoding: [0x7e,0x03,0x80,0x95] +0x7e,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], m0, s3 ; encoding: [0x7d,0x03,0x80,0x95] +0x7d,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s104, s103 ; encoding: [0x68,0x67,0x80,0x95] +0x68,0x67,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s104, s3 ; encoding: [0x68,0x03,0x80,0x95] +0x68,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, 0.5 ; encoding: [0x02,0xf0,0x80,0x95] +0x02,0xf0,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, 0 ; encoding: [0x02,0x80,0x80,0x95] +0x02,0x80,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, 0x3f717273 ; encoding: [0x02,0xff,0x80,0x95,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x95,0x73,0x72,0x71,0x3f + +# GFX11: s_bfm_b64 s[0:1], s2, 0xaf123456 ; encoding: [0x02,0xff,0x80,0x95,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x95,0x56,0x34,0x12,0xaf + +# GFX11: s_bfm_b64 s[0:1], s2, -1 ; encoding: [0x02,0xc1,0x80,0x95] +0x02,0xc1,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, -4.0 ; encoding: [0x02,0xf7,0x80,0x95] +0x02,0xf7,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, exec_hi ; encoding: [0x02,0x7f,0x80,0x95] +0x02,0x7f,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, exec_lo ; encoding: [0x02,0x7e,0x80,0x95] +0x02,0x7e,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, m0 ; encoding: [0x02,0x7d,0x80,0x95] +0x02,0x7d,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, s103 ; encoding: [0x02,0x67,0x80,0x95] +0x02,0x67,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, s3 ; encoding: [0x02,0x03,0x80,0x95] +0x02,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, vcc_hi ; encoding: [0x02,0x6b,0x80,0x95] +0x02,0x6b,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], s2, vcc_lo ; encoding: [0x02,0x6a,0x80,0x95] +0x02,0x6a,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], vcc_hi, s3 ; encoding: [0x6b,0x03,0x80,0x95] +0x6b,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[0:1], vcc_lo, s3 ; encoding: [0x6a,0x03,0x80,0x95] +0x6a,0x03,0x80,0x95 + +# GFX11: s_bfm_b64 s[104:105], s103, s102 ; encoding: [0x67,0x66,0xe8,0x95] +0x67,0x66,0xe8,0x95 + +# GFX11: s_bfm_b64 s[104:105], s104, s3 ; encoding: [0x68,0x03,0xe8,0x95] +0x68,0x03,0xe8,0x95 + +# GFX11: s_bfm_b64 s[104:105], s2, s103 ; encoding: [0x02,0x67,0xe8,0x95] +0x02,0x67,0xe8,0x95 + +# GFX11: s_bfm_b64 s[104:105], s2, s3 ; encoding: [0x02,0x03,0xe8,0x95] +0x02,0x03,0xe8,0x95 + +# GFX11: s_bfm_b64 vcc, s2, s3 ; encoding: [0x02,0x03,0xea,0x95] +0x02,0x03,0xea,0x95 + +# GFX11: s_bitcmp0_b32 exec_hi, s1 ; encoding: [0x7f,0x01,0x0c,0xbf] +0x7f,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 exec_lo, s1 ; encoding: [0x7e,0x01,0x0c,0xbf] +0x7e,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 m0, s1 ; encoding: [0x7d,0x01,0x0c,0xbf] +0x7d,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, 0.5 ; encoding: [0x00,0xf0,0x0c,0xbf] +0x00,0xf0,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, 0 ; encoding: [0x00,0x80,0x0c,0xbf] +0x00,0x80,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x0c,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0c,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_bitcmp0_b32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x0c,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0c,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_bitcmp0_b32 s0, -1 ; encoding: [0x00,0xc1,0x0c,0xbf] +0x00,0xc1,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, -4.0 ; encoding: [0x00,0xf7,0x0c,0xbf] +0x00,0xf7,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, exec_hi ; encoding: [0x00,0x7f,0x0c,0xbf] +0x00,0x7f,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, exec_lo ; encoding: [0x00,0x7e,0x0c,0xbf] +0x00,0x7e,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, m0 ; encoding: [0x00,0x7d,0x0c,0xbf] +0x00,0x7d,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, s104 ; encoding: [0x00,0x68,0x0c,0xbf] +0x00,0x68,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, s1 ; encoding: [0x00,0x01,0x0c,0xbf] +0x00,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, vcc_hi ; encoding: [0x00,0x6b,0x0c,0xbf] +0x00,0x6b,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s0, vcc_lo ; encoding: [0x00,0x6a,0x0c,0xbf] +0x00,0x6a,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s105, s104 ; encoding: [0x69,0x68,0x0c,0xbf] +0x69,0x68,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 s105, s1 ; encoding: [0x69,0x01,0x0c,0xbf] +0x69,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x0c,0xbf] +0x6b,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x0c,0xbf] +0x6a,0x01,0x0c,0xbf + +# GFX11: s_bitcmp0_b64 exec, s2 ; encoding: [0x7e,0x02,0x0e,0xbf] +0x7e,0x02,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], 0.5 ; encoding: [0x00,0xf0,0x0e,0xbf] +0x00,0xf0,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], 0 ; encoding: [0x00,0x80,0x0e,0xbf] +0x00,0x80,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], 0x3f717273 ; encoding: [0x00,0xff,0x0e,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0e,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_bitcmp0_b64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x0e,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0e,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_bitcmp0_b64 s[0:1], -1 ; encoding: [0x00,0xc1,0x0e,0xbf] +0x00,0xc1,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], -4.0 ; encoding: [0x00,0xf7,0x0e,0xbf] +0x00,0xf7,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], exec_hi ; encoding: [0x00,0x7f,0x0e,0xbf] +0x00,0x7f,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], exec_lo ; encoding: [0x00,0x7e,0x0e,0xbf] +0x00,0x7e,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], m0 ; encoding: [0x00,0x7d,0x0e,0xbf] +0x00,0x7d,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], s102 ; encoding: [0x00,0x66,0x0e,0xbf] +0x00,0x66,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], s2 ; encoding: [0x00,0x02,0x0e,0xbf] +0x00,0x02,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], vcc_hi ; encoding: [0x00,0x6b,0x0e,0xbf] +0x00,0x6b,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[0:1], vcc_lo ; encoding: [0x00,0x6a,0x0e,0xbf] +0x00,0x6a,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[104:105], s102 ; encoding: [0x68,0x66,0x0e,0xbf] +0x68,0x66,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 s[104:105], s2 ; encoding: [0x68,0x02,0x0e,0xbf] +0x68,0x02,0x0e,0xbf + +# GFX11: s_bitcmp0_b64 vcc, s2 ; encoding: [0x6a,0x02,0x0e,0xbf] +0x6a,0x02,0x0e,0xbf + +# GFX11: s_bitcmp1_b32 exec_hi, s1 ; encoding: [0x7f,0x01,0x0d,0xbf] +0x7f,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 exec_lo, s1 ; encoding: [0x7e,0x01,0x0d,0xbf] +0x7e,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 m0, s1 ; encoding: [0x7d,0x01,0x0d,0xbf] +0x7d,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, 0.5 ; encoding: [0x00,0xf0,0x0d,0xbf] +0x00,0xf0,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, 0 ; encoding: [0x00,0x80,0x0d,0xbf] +0x00,0x80,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x0d,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0d,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_bitcmp1_b32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x0d,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0d,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_bitcmp1_b32 s0, -1 ; encoding: [0x00,0xc1,0x0d,0xbf] +0x00,0xc1,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, -4.0 ; encoding: [0x00,0xf7,0x0d,0xbf] +0x00,0xf7,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, exec_hi ; encoding: [0x00,0x7f,0x0d,0xbf] +0x00,0x7f,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, exec_lo ; encoding: [0x00,0x7e,0x0d,0xbf] +0x00,0x7e,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, m0 ; encoding: [0x00,0x7d,0x0d,0xbf] +0x00,0x7d,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, s104 ; encoding: [0x00,0x68,0x0d,0xbf] +0x00,0x68,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, s1 ; encoding: [0x00,0x01,0x0d,0xbf] +0x00,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, vcc_hi ; encoding: [0x00,0x6b,0x0d,0xbf] +0x00,0x6b,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s0, vcc_lo ; encoding: [0x00,0x6a,0x0d,0xbf] +0x00,0x6a,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s105, s104 ; encoding: [0x69,0x68,0x0d,0xbf] +0x69,0x68,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 s105, s1 ; encoding: [0x69,0x01,0x0d,0xbf] +0x69,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x0d,0xbf] +0x6b,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x0d,0xbf] +0x6a,0x01,0x0d,0xbf + +# GFX11: s_bitcmp1_b64 exec, s2 ; encoding: [0x7e,0x02,0x0f,0xbf] +0x7e,0x02,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], 0.5 ; encoding: [0x00,0xf0,0x0f,0xbf] +0x00,0xf0,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], 0 ; encoding: [0x00,0x80,0x0f,0xbf] +0x00,0x80,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], 0x3f717273 ; encoding: [0x00,0xff,0x0f,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0f,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_bitcmp1_b64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x0f,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0f,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_bitcmp1_b64 s[0:1], -1 ; encoding: [0x00,0xc1,0x0f,0xbf] +0x00,0xc1,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], -4.0 ; encoding: [0x00,0xf7,0x0f,0xbf] +0x00,0xf7,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], exec_hi ; encoding: [0x00,0x7f,0x0f,0xbf] +0x00,0x7f,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], exec_lo ; encoding: [0x00,0x7e,0x0f,0xbf] +0x00,0x7e,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], m0 ; encoding: [0x00,0x7d,0x0f,0xbf] +0x00,0x7d,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], s102 ; encoding: [0x00,0x66,0x0f,0xbf] +0x00,0x66,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], s2 ; encoding: [0x00,0x02,0x0f,0xbf] +0x00,0x02,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], vcc_hi ; encoding: [0x00,0x6b,0x0f,0xbf] +0x00,0x6b,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[0:1], vcc_lo ; encoding: [0x00,0x6a,0x0f,0xbf] +0x00,0x6a,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[104:105], s102 ; encoding: [0x68,0x66,0x0f,0xbf] +0x68,0x66,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 s[104:105], s2 ; encoding: [0x68,0x02,0x0f,0xbf] +0x68,0x02,0x0f,0xbf + +# GFX11: s_bitcmp1_b64 vcc, s2 ; encoding: [0x6a,0x02,0x0f,0xbf] +0x6a,0x02,0x0f,0xbf + +# GFX11: s_bitreplicate_b64_b32 exec, s2 ; encoding: [0x02,0x14,0xfe,0xbe] +0x02,0x14,0xfe,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], 0.5 ; encoding: [0xf0,0x14,0x80,0xbe] +0xf0,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], 0 ; encoding: [0x80,0x14,0x80,0xbe] +0x80,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], 0x3f717273 ; encoding: [0xff,0x14,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x14,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bitreplicate_b64_b32 s[0:1], 0xaf123456 ; encoding: [0xff,0x14,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x14,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bitreplicate_b64_b32 s[0:1], -1 ; encoding: [0xc1,0x14,0x80,0xbe] +0xc1,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], -4.0 ; encoding: [0xf7,0x14,0x80,0xbe] +0xf7,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], exec_hi ; encoding: [0x7f,0x14,0x80,0xbe] +0x7f,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], exec_lo ; encoding: [0x7e,0x14,0x80,0xbe] +0x7e,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], m0 ; encoding: [0x7d,0x14,0x80,0xbe] +0x7d,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], s102 ; encoding: [0x66,0x14,0x80,0xbe] +0x66,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], s2 ; encoding: [0x02,0x14,0x80,0xbe] +0x02,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], vcc_hi ; encoding: [0x6b,0x14,0x80,0xbe] +0x6b,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[0:1], vcc_lo ; encoding: [0x6a,0x14,0x80,0xbe] +0x6a,0x14,0x80,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[104:105], s102 ; encoding: [0x66,0x14,0xe8,0xbe] +0x66,0x14,0xe8,0xbe + +# GFX11: s_bitreplicate_b64_b32 s[104:105], s2 ; encoding: [0x02,0x14,0xe8,0xbe] +0x02,0x14,0xe8,0xbe + +# GFX11: s_bitreplicate_b64_b32 vcc, s2 ; encoding: [0x02,0x14,0xea,0xbe] +0x02,0x14,0xea,0xbe + +# GFX11: s_bitset0_b32 exec_hi, s1 ; encoding: [0x01,0x10,0xff,0xbe] +0x01,0x10,0xff,0xbe + +# GFX11: s_bitset0_b32 exec_lo, s1 ; encoding: [0x01,0x10,0xfe,0xbe] +0x01,0x10,0xfe,0xbe + +# GFX11: s_bitset0_b32 m0, s1 ; encoding: [0x01,0x10,0xfd,0xbe] +0x01,0x10,0xfd,0xbe + +# GFX11: s_bitset0_b32 s0, 0.5 ; encoding: [0xf0,0x10,0x80,0xbe] +0xf0,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, 0 ; encoding: [0x80,0x10,0x80,0xbe] +0x80,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, 0x3f717273 ; encoding: [0xff,0x10,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x10,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bitset0_b32 s0, 0xaf123456 ; encoding: [0xff,0x10,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x10,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bitset0_b32 s0, -1 ; encoding: [0xc1,0x10,0x80,0xbe] +0xc1,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, -4.0 ; encoding: [0xf7,0x10,0x80,0xbe] +0xf7,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, exec_hi ; encoding: [0x7f,0x10,0x80,0xbe] +0x7f,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, exec_lo ; encoding: [0x7e,0x10,0x80,0xbe] +0x7e,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, m0 ; encoding: [0x7d,0x10,0x80,0xbe] +0x7d,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, s104 ; encoding: [0x68,0x10,0x80,0xbe] +0x68,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, s1 ; encoding: [0x01,0x10,0x80,0xbe] +0x01,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, vcc_hi ; encoding: [0x6b,0x10,0x80,0xbe] +0x6b,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s0, vcc_lo ; encoding: [0x6a,0x10,0x80,0xbe] +0x6a,0x10,0x80,0xbe + +# GFX11: s_bitset0_b32 s105, s104 ; encoding: [0x68,0x10,0xe9,0xbe] +0x68,0x10,0xe9,0xbe + +# GFX11: s_bitset0_b32 s105, s1 ; encoding: [0x01,0x10,0xe9,0xbe] +0x01,0x10,0xe9,0xbe + +# GFX11: s_bitset0_b32 vcc_hi, s1 ; encoding: [0x01,0x10,0xeb,0xbe] +0x01,0x10,0xeb,0xbe + +# GFX11: s_bitset0_b32 vcc_lo, s1 ; encoding: [0x01,0x10,0xea,0xbe] +0x01,0x10,0xea,0xbe + +# GFX11: s_bitset0_b64 exec, s2 ; encoding: [0x02,0x11,0xfe,0xbe] +0x02,0x11,0xfe,0xbe + +# GFX11: s_bitset0_b64 s[0:1], 0.5 ; encoding: [0xf0,0x11,0x80,0xbe] +0xf0,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], 0 ; encoding: [0x80,0x11,0x80,0xbe] +0x80,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x11,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x11,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bitset0_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x11,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x11,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bitset0_b64 s[0:1], -1 ; encoding: [0xc1,0x11,0x80,0xbe] +0xc1,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], -4.0 ; encoding: [0xf7,0x11,0x80,0xbe] +0xf7,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], exec_hi ; encoding: [0x7f,0x11,0x80,0xbe] +0x7f,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], exec_lo ; encoding: [0x7e,0x11,0x80,0xbe] +0x7e,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], m0 ; encoding: [0x7d,0x11,0x80,0xbe] +0x7d,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], s102 ; encoding: [0x66,0x11,0x80,0xbe] +0x66,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], s2 ; encoding: [0x02,0x11,0x80,0xbe] +0x02,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], vcc_hi ; encoding: [0x6b,0x11,0x80,0xbe] +0x6b,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[0:1], vcc_lo ; encoding: [0x6a,0x11,0x80,0xbe] +0x6a,0x11,0x80,0xbe + +# GFX11: s_bitset0_b64 s[104:105], s102 ; encoding: [0x66,0x11,0xe8,0xbe] +0x66,0x11,0xe8,0xbe + +# GFX11: s_bitset0_b64 s[104:105], s2 ; encoding: [0x02,0x11,0xe8,0xbe] +0x02,0x11,0xe8,0xbe + +# GFX11: s_bitset0_b64 vcc, s2 ; encoding: [0x02,0x11,0xea,0xbe] +0x02,0x11,0xea,0xbe + +# GFX11: s_bitset1_b32 exec_hi, s1 ; encoding: [0x01,0x12,0xff,0xbe] +0x01,0x12,0xff,0xbe + +# GFX11: s_bitset1_b32 exec_lo, s1 ; encoding: [0x01,0x12,0xfe,0xbe] +0x01,0x12,0xfe,0xbe + +# GFX11: s_bitset1_b32 m0, s1 ; encoding: [0x01,0x12,0xfd,0xbe] +0x01,0x12,0xfd,0xbe + +# GFX11: s_bitset1_b32 s0, 0.5 ; encoding: [0xf0,0x12,0x80,0xbe] +0xf0,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, 0 ; encoding: [0x80,0x12,0x80,0xbe] +0x80,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, 0x3f717273 ; encoding: [0xff,0x12,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x12,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bitset1_b32 s0, 0xaf123456 ; encoding: [0xff,0x12,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x12,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bitset1_b32 s0, -1 ; encoding: [0xc1,0x12,0x80,0xbe] +0xc1,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, -4.0 ; encoding: [0xf7,0x12,0x80,0xbe] +0xf7,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, exec_hi ; encoding: [0x7f,0x12,0x80,0xbe] +0x7f,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, exec_lo ; encoding: [0x7e,0x12,0x80,0xbe] +0x7e,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, m0 ; encoding: [0x7d,0x12,0x80,0xbe] +0x7d,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, s104 ; encoding: [0x68,0x12,0x80,0xbe] +0x68,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, s1 ; encoding: [0x01,0x12,0x80,0xbe] +0x01,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, vcc_hi ; encoding: [0x6b,0x12,0x80,0xbe] +0x6b,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s0, vcc_lo ; encoding: [0x6a,0x12,0x80,0xbe] +0x6a,0x12,0x80,0xbe + +# GFX11: s_bitset1_b32 s105, s104 ; encoding: [0x68,0x12,0xe9,0xbe] +0x68,0x12,0xe9,0xbe + +# GFX11: s_bitset1_b32 s105, s1 ; encoding: [0x01,0x12,0xe9,0xbe] +0x01,0x12,0xe9,0xbe + +# GFX11: s_bitset1_b32 vcc_hi, s1 ; encoding: [0x01,0x12,0xeb,0xbe] +0x01,0x12,0xeb,0xbe + +# GFX11: s_bitset1_b32 vcc_lo, s1 ; encoding: [0x01,0x12,0xea,0xbe] +0x01,0x12,0xea,0xbe + +# GFX11: s_bitset1_b64 exec, s2 ; encoding: [0x02,0x13,0xfe,0xbe] +0x02,0x13,0xfe,0xbe + +# GFX11: s_bitset1_b64 s[0:1], 0.5 ; encoding: [0xf0,0x13,0x80,0xbe] +0xf0,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], 0 ; encoding: [0x80,0x13,0x80,0xbe] +0x80,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x13,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x13,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_bitset1_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x13,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x13,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_bitset1_b64 s[0:1], -1 ; encoding: [0xc1,0x13,0x80,0xbe] +0xc1,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], -4.0 ; encoding: [0xf7,0x13,0x80,0xbe] +0xf7,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], exec_hi ; encoding: [0x7f,0x13,0x80,0xbe] +0x7f,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], exec_lo ; encoding: [0x7e,0x13,0x80,0xbe] +0x7e,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], m0 ; encoding: [0x7d,0x13,0x80,0xbe] +0x7d,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], s102 ; encoding: [0x66,0x13,0x80,0xbe] +0x66,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], s2 ; encoding: [0x02,0x13,0x80,0xbe] +0x02,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], vcc_hi ; encoding: [0x6b,0x13,0x80,0xbe] +0x6b,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[0:1], vcc_lo ; encoding: [0x6a,0x13,0x80,0xbe] +0x6a,0x13,0x80,0xbe + +# GFX11: s_bitset1_b64 s[104:105], s102 ; encoding: [0x66,0x13,0xe8,0xbe] +0x66,0x13,0xe8,0xbe + +# GFX11: s_bitset1_b64 s[104:105], s2 ; encoding: [0x02,0x13,0xe8,0xbe] +0x02,0x13,0xe8,0xbe + +# GFX11: s_bitset1_b64 vcc, s2 ; encoding: [0x02,0x13,0xea,0xbe] +0x02,0x13,0xea,0xbe + +# GFX11: s_branch 0 ; encoding: [0x00,0x00,0xa0,0xbf] +0x00,0x00,0xa0,0xbf + +# GFX11: s_branch 4660 ; encoding: [0x34,0x12,0xa0,0xbf] +0x34,0x12,0xa0,0xbf + +# GFX11: s_brev_b32 exec_hi, s1 ; encoding: [0x01,0x04,0xff,0xbe] +0x01,0x04,0xff,0xbe + +# GFX11: s_brev_b32 exec_lo, s1 ; encoding: [0x01,0x04,0xfe,0xbe] +0x01,0x04,0xfe,0xbe + +# GFX11: s_brev_b32 m0, s1 ; encoding: [0x01,0x04,0xfd,0xbe] +0x01,0x04,0xfd,0xbe + +# GFX11: s_brev_b32 s0, 0.5 ; encoding: [0xf0,0x04,0x80,0xbe] +0xf0,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, 0 ; encoding: [0x80,0x04,0x80,0xbe] +0x80,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, 0x3f717273 ; encoding: [0xff,0x04,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_brev_b32 s0, 0xaf123456 ; encoding: [0xff,0x04,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_brev_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe] +0xc1,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, -4.0 ; encoding: [0xf7,0x04,0x80,0xbe] +0xf7,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, exec_hi ; encoding: [0x7f,0x04,0x80,0xbe] +0x7f,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, exec_lo ; encoding: [0x7e,0x04,0x80,0xbe] +0x7e,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, m0 ; encoding: [0x7d,0x04,0x80,0xbe] +0x7d,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, s104 ; encoding: [0x68,0x04,0x80,0xbe] +0x68,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, s1 ; encoding: [0x01,0x04,0x80,0xbe] +0x01,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, vcc_hi ; encoding: [0x6b,0x04,0x80,0xbe] +0x6b,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s0, vcc_lo ; encoding: [0x6a,0x04,0x80,0xbe] +0x6a,0x04,0x80,0xbe + +# GFX11: s_brev_b32 s105, s104 ; encoding: [0x68,0x04,0xe9,0xbe] +0x68,0x04,0xe9,0xbe + +# GFX11: s_brev_b32 s105, s1 ; encoding: [0x01,0x04,0xe9,0xbe] +0x01,0x04,0xe9,0xbe + +# GFX11: s_brev_b32 vcc_hi, s1 ; encoding: [0x01,0x04,0xeb,0xbe] +0x01,0x04,0xeb,0xbe + +# GFX11: s_brev_b32 vcc_lo, s1 ; encoding: [0x01,0x04,0xea,0xbe] +0x01,0x04,0xea,0xbe + +# GFX11: s_brev_b64 exec, s[2:3] ; encoding: [0x02,0x05,0xfe,0xbe] +0x02,0x05,0xfe,0xbe + +# GFX11: s_brev_b64 s[0:1], 0.5 ; encoding: [0xf0,0x05,0x80,0xbe] +0xf0,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], 0 ; encoding: [0x80,0x05,0x80,0xbe] +0x80,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x05,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x05,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_brev_b64 s[0:1], -1 ; encoding: [0xc1,0x05,0x80,0xbe] +0xc1,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], -4.0 ; encoding: [0xf7,0x05,0x80,0xbe] +0xf7,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], exec ; encoding: [0x7e,0x05,0x80,0xbe] +0x7e,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], s[102:103] ; encoding: [0x66,0x05,0x80,0xbe] +0x66,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], s[2:3] ; encoding: [0x02,0x05,0x80,0xbe] +0x02,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[0:1], vcc ; encoding: [0x6a,0x05,0x80,0xbe] +0x6a,0x05,0x80,0xbe + +# GFX11: s_brev_b64 s[104:105], s[102:103] ; encoding: [0x66,0x05,0xe8,0xbe] +0x66,0x05,0xe8,0xbe + +# GFX11: s_brev_b64 s[104:105], s[2:3] ; encoding: [0x02,0x05,0xe8,0xbe] +0x02,0x05,0xe8,0xbe + +# GFX11: s_brev_b64 vcc, s[2:3] ; encoding: [0x02,0x05,0xea,0xbe] +0x02,0x05,0xea,0xbe + +# GFX11: s_call_b64 exec, 4660 ; encoding: [0x34,0x12,0x7e,0xba] +0x34,0x12,0x7e,0xba + +# GFX11: s_call_b64 s[0:1], 4660 ; encoding: [0x34,0x12,0x00,0xba] +0x34,0x12,0x00,0xba + +# GFX11: s_call_b64 s[104:105], 4660 ; encoding: [0x34,0x12,0x68,0xba] +0x34,0x12,0x68,0xba + +# GFX11: s_call_b64 vcc, 4660 ; encoding: [0x34,0x12,0x6a,0xba] +0x34,0x12,0x6a,0xba + +# GFX11: s_call_b64 null, 4660 ; encoding: [0x34,0x12,0x7c,0xba] +0x34,0x12,0x7c,0xba + +# GFX11: s_cbranch_cdbgsys 0 ; encoding: [0x00,0x00,0xa7,0xbf] +0x00,0x00,0xa7,0xbf + +# GFX11: s_cbranch_cdbgsys 4660 ; encoding: [0x34,0x12,0xa7,0xbf] +0x34,0x12,0xa7,0xbf + +# GFX11: s_cbranch_cdbgsys_and_user 0 ; encoding: [0x00,0x00,0xaa,0xbf] +0x00,0x00,0xaa,0xbf + +# GFX11: s_cbranch_cdbgsys_and_user 4660 ; encoding: [0x34,0x12,0xaa,0xbf] +0x34,0x12,0xaa,0xbf + +# GFX11: s_cbranch_cdbgsys_or_user 0 ; encoding: [0x00,0x00,0xa9,0xbf] +0x00,0x00,0xa9,0xbf + +# GFX11: s_cbranch_cdbgsys_or_user 4660 ; encoding: [0x34,0x12,0xa9,0xbf] +0x34,0x12,0xa9,0xbf + +# GFX11: s_cbranch_cdbguser 0 ; encoding: [0x00,0x00,0xa8,0xbf] +0x00,0x00,0xa8,0xbf + +# GFX11: s_cbranch_cdbguser 4660 ; encoding: [0x34,0x12,0xa8,0xbf] +0x34,0x12,0xa8,0xbf + +# GFX11: s_cbranch_execnz 0 ; encoding: [0x00,0x00,0xa6,0xbf] +0x00,0x00,0xa6,0xbf + +# GFX11: s_cbranch_execnz 4660 ; encoding: [0x34,0x12,0xa6,0xbf] +0x34,0x12,0xa6,0xbf + +# GFX11: s_cbranch_execz 0 ; encoding: [0x00,0x00,0xa5,0xbf] +0x00,0x00,0xa5,0xbf + +# GFX11: s_cbranch_execz 4660 ; encoding: [0x34,0x12,0xa5,0xbf] +0x34,0x12,0xa5,0xbf + +# GFX11: s_cbranch_scc0 0 ; encoding: [0x00,0x00,0xa1,0xbf] +0x00,0x00,0xa1,0xbf + +# GFX11: s_cbranch_scc0 4660 ; encoding: [0x34,0x12,0xa1,0xbf] +0x34,0x12,0xa1,0xbf + +# GFX11: s_cbranch_scc1 0 ; encoding: [0x00,0x00,0xa2,0xbf] +0x00,0x00,0xa2,0xbf + +# GFX11: s_cbranch_scc1 4660 ; encoding: [0x34,0x12,0xa2,0xbf] +0x34,0x12,0xa2,0xbf + +# GFX11: s_cbranch_vccnz 0 ; encoding: [0x00,0x00,0xa4,0xbf] +0x00,0x00,0xa4,0xbf + +# GFX11: s_cbranch_vccnz 4660 ; encoding: [0x34,0x12,0xa4,0xbf] +0x34,0x12,0xa4,0xbf + +# GFX11: s_cbranch_vccz 0 ; encoding: [0x00,0x00,0xa3,0xbf] +0x00,0x00,0xa3,0xbf + +# GFX11: s_cbranch_vccz 4660 ; encoding: [0x34,0x12,0xa3,0xbf] +0x34,0x12,0xa3,0xbf + +# GFX11: s_clause 0x0 ; encoding: [0x00,0x00,0x85,0xbf] +0x00,0x00,0x85,0xbf + +# GFX11: s_clause 0x1234 ; encoding: [0x34,0x12,0x85,0xbf] +0x34,0x12,0x85,0xbf + +# GFX11: s_clause 0xc1d1 ; encoding: [0xd1,0xc1,0x85,0xbf] +0xd1,0xc1,0x85,0xbf + +# GFX11: s_cls_i32 exec_hi, s1 ; encoding: [0x01,0x0c,0xff,0xbe] +0x01,0x0c,0xff,0xbe + +# GFX11: s_cls_i32 exec_lo, s1 ; encoding: [0x01,0x0c,0xfe,0xbe] +0x01,0x0c,0xfe,0xbe + +# GFX11: s_cls_i32_i64 exec_hi, s[2:3] ; encoding: [0x02,0x0d,0xff,0xbe] +0x02,0x0d,0xff,0xbe + +# GFX11: s_cls_i32_i64 exec_lo, s[2:3] ; encoding: [0x02,0x0d,0xfe,0xbe] +0x02,0x0d,0xfe,0xbe + +# GFX11: s_cls_i32_i64 m0, s[2:3] ; encoding: [0x02,0x0d,0xfd,0xbe] +0x02,0x0d,0xfd,0xbe + +# GFX11: s_cls_i32_i64 s0, 0.5 ; encoding: [0xf0,0x0d,0x80,0xbe] +0xf0,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, 0 ; encoding: [0x80,0x0d,0x80,0xbe] +0x80,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, 0x3f717273 ; encoding: [0xff,0x0d,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0d,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_cls_i32_i64 s0, -1 ; encoding: [0xc1,0x0d,0x80,0xbe] +0xc1,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, -4.0 ; encoding: [0xf7,0x0d,0x80,0xbe] +0xf7,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, exec ; encoding: [0x7e,0x0d,0x80,0xbe] +0x7e,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, s[102:103] ; encoding: [0x66,0x0d,0x80,0xbe] +0x66,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, s[2:3] ; encoding: [0x02,0x0d,0x80,0xbe] +0x02,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s0, vcc ; encoding: [0x6a,0x0d,0x80,0xbe] +0x6a,0x0d,0x80,0xbe + +# GFX11: s_cls_i32_i64 s105, s[102:103] ; encoding: [0x66,0x0d,0xe9,0xbe] +0x66,0x0d,0xe9,0xbe + +# GFX11: s_cls_i32_i64 s105, s[2:3] ; encoding: [0x02,0x0d,0xe9,0xbe] +0x02,0x0d,0xe9,0xbe + +# GFX11: s_cls_i32_i64 vcc_hi, s[2:3] ; encoding: [0x02,0x0d,0xeb,0xbe] +0x02,0x0d,0xeb,0xbe + +# GFX11: s_cls_i32_i64 vcc_lo, s[2:3] ; encoding: [0x02,0x0d,0xea,0xbe] +0x02,0x0d,0xea,0xbe + +# GFX11: s_cls_i32 m0, s1 ; encoding: [0x01,0x0c,0xfd,0xbe] +0x01,0x0c,0xfd,0xbe + +# GFX11: s_cls_i32 s0, 0.5 ; encoding: [0xf0,0x0c,0x80,0xbe] +0xf0,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, 0 ; encoding: [0x80,0x0c,0x80,0xbe] +0x80,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, 0x3f717273 ; encoding: [0xff,0x0c,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0c,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_cls_i32 s0, 0xaf123456 ; encoding: [0xff,0x0c,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0c,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_cls_i32 s0, -1 ; encoding: [0xc1,0x0c,0x80,0xbe] +0xc1,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, -4.0 ; encoding: [0xf7,0x0c,0x80,0xbe] +0xf7,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, exec_hi ; encoding: [0x7f,0x0c,0x80,0xbe] +0x7f,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, exec_lo ; encoding: [0x7e,0x0c,0x80,0xbe] +0x7e,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, m0 ; encoding: [0x7d,0x0c,0x80,0xbe] +0x7d,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, s104 ; encoding: [0x68,0x0c,0x80,0xbe] +0x68,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, s1 ; encoding: [0x01,0x0c,0x80,0xbe] +0x01,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, vcc_hi ; encoding: [0x6b,0x0c,0x80,0xbe] +0x6b,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s0, vcc_lo ; encoding: [0x6a,0x0c,0x80,0xbe] +0x6a,0x0c,0x80,0xbe + +# GFX11: s_cls_i32 s105, s104 ; encoding: [0x68,0x0c,0xe9,0xbe] +0x68,0x0c,0xe9,0xbe + +# GFX11: s_cls_i32 s105, s1 ; encoding: [0x01,0x0c,0xe9,0xbe] +0x01,0x0c,0xe9,0xbe + +# GFX11: s_cls_i32 vcc_hi, s1 ; encoding: [0x01,0x0c,0xeb,0xbe] +0x01,0x0c,0xeb,0xbe + +# GFX11: s_cls_i32 vcc_lo, s1 ; encoding: [0x01,0x0c,0xea,0xbe] +0x01,0x0c,0xea,0xbe + +# GFX11: s_clz_i32_u32 exec_hi, s1 ; encoding: [0x01,0x0a,0xff,0xbe] +0x01,0x0a,0xff,0xbe + +# GFX11: s_clz_i32_u32 exec_lo, s1 ; encoding: [0x01,0x0a,0xfe,0xbe] +0x01,0x0a,0xfe,0xbe + +# GFX11: s_clz_i32_u32 m0, s1 ; encoding: [0x01,0x0a,0xfd,0xbe] +0x01,0x0a,0xfd,0xbe + +# GFX11: s_clz_i32_u32 s0, 0.5 ; encoding: [0xf0,0x0a,0x80,0xbe] +0xf0,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, 0 ; encoding: [0x80,0x0a,0x80,0xbe] +0x80,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, 0x3f717273 ; encoding: [0xff,0x0a,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0a,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_clz_i32_u32 s0, 0xaf123456 ; encoding: [0xff,0x0a,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0a,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_clz_i32_u32 s0, -1 ; encoding: [0xc1,0x0a,0x80,0xbe] +0xc1,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, -4.0 ; encoding: [0xf7,0x0a,0x80,0xbe] +0xf7,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, exec_hi ; encoding: [0x7f,0x0a,0x80,0xbe] +0x7f,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, exec_lo ; encoding: [0x7e,0x0a,0x80,0xbe] +0x7e,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, m0 ; encoding: [0x7d,0x0a,0x80,0xbe] +0x7d,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, s104 ; encoding: [0x68,0x0a,0x80,0xbe] +0x68,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, s1 ; encoding: [0x01,0x0a,0x80,0xbe] +0x01,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, vcc_hi ; encoding: [0x6b,0x0a,0x80,0xbe] +0x6b,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s0, vcc_lo ; encoding: [0x6a,0x0a,0x80,0xbe] +0x6a,0x0a,0x80,0xbe + +# GFX11: s_clz_i32_u32 s105, s104 ; encoding: [0x68,0x0a,0xe9,0xbe] +0x68,0x0a,0xe9,0xbe + +# GFX11: s_clz_i32_u32 s105, s1 ; encoding: [0x01,0x0a,0xe9,0xbe] +0x01,0x0a,0xe9,0xbe + +# GFX11: s_clz_i32_u32 vcc_hi, s1 ; encoding: [0x01,0x0a,0xeb,0xbe] +0x01,0x0a,0xeb,0xbe + +# GFX11: s_clz_i32_u32 vcc_lo, s1 ; encoding: [0x01,0x0a,0xea,0xbe] +0x01,0x0a,0xea,0xbe + +# GFX11: s_clz_i32_u64 exec_hi, s[2:3] ; encoding: [0x02,0x0b,0xff,0xbe] +0x02,0x0b,0xff,0xbe + +# GFX11: s_clz_i32_u64 exec_lo, s[2:3] ; encoding: [0x02,0x0b,0xfe,0xbe] +0x02,0x0b,0xfe,0xbe + +# GFX11: s_clz_i32_u64 m0, s[2:3] ; encoding: [0x02,0x0b,0xfd,0xbe] +0x02,0x0b,0xfd,0xbe + +# GFX11: s_clz_i32_u64 s0, 0.5 ; encoding: [0xf0,0x0b,0x80,0xbe] +0xf0,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, 0 ; encoding: [0x80,0x0b,0x80,0xbe] +0x80,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, 0x3f717273 ; encoding: [0xff,0x0b,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0b,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_clz_i32_u64 s0, -1 ; encoding: [0xc1,0x0b,0x80,0xbe] +0xc1,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, -4.0 ; encoding: [0xf7,0x0b,0x80,0xbe] +0xf7,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, exec ; encoding: [0x7e,0x0b,0x80,0xbe] +0x7e,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, s[102:103] ; encoding: [0x66,0x0b,0x80,0xbe] +0x66,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, s[2:3] ; encoding: [0x02,0x0b,0x80,0xbe] +0x02,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s0, vcc ; encoding: [0x6a,0x0b,0x80,0xbe] +0x6a,0x0b,0x80,0xbe + +# GFX11: s_clz_i32_u64 s105, s[102:103] ; encoding: [0x66,0x0b,0xe9,0xbe] +0x66,0x0b,0xe9,0xbe + +# GFX11: s_clz_i32_u64 s105, s[2:3] ; encoding: [0x02,0x0b,0xe9,0xbe] +0x02,0x0b,0xe9,0xbe + +# GFX11: s_clz_i32_u64 vcc_hi, s[2:3] ; encoding: [0x02,0x0b,0xeb,0xbe] +0x02,0x0b,0xeb,0xbe + +# GFX11: s_clz_i32_u64 vcc_lo, s[2:3] ; encoding: [0x02,0x0b,0xea,0xbe] +0x02,0x0b,0xea,0xbe + +# GFX11: s_cmov_b32 exec_hi, s1 ; encoding: [0x01,0x02,0xff,0xbe] +0x01,0x02,0xff,0xbe + +# GFX11: s_cmov_b32 exec_lo, s1 ; encoding: [0x01,0x02,0xfe,0xbe] +0x01,0x02,0xfe,0xbe + +# GFX11: s_cmov_b32 m0, s1 ; encoding: [0x01,0x02,0xfd,0xbe] +0x01,0x02,0xfd,0xbe + +# GFX11: s_cmov_b32 s0, 0.5 ; encoding: [0xf0,0x02,0x80,0xbe] +0xf0,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, 0 ; encoding: [0x80,0x02,0x80,0xbe] +0x80,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, 0x3f717273 ; encoding: [0xff,0x02,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_cmov_b32 s0, 0xaf123456 ; encoding: [0xff,0x02,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_cmov_b32 s0, -1 ; encoding: [0xc1,0x02,0x80,0xbe] +0xc1,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, -4.0 ; encoding: [0xf7,0x02,0x80,0xbe] +0xf7,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, exec_hi ; encoding: [0x7f,0x02,0x80,0xbe] +0x7f,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, exec_lo ; encoding: [0x7e,0x02,0x80,0xbe] +0x7e,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, m0 ; encoding: [0x7d,0x02,0x80,0xbe] +0x7d,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, s104 ; encoding: [0x68,0x02,0x80,0xbe] +0x68,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, s1 ; encoding: [0x01,0x02,0x80,0xbe] +0x01,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, vcc_hi ; encoding: [0x6b,0x02,0x80,0xbe] +0x6b,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s0, vcc_lo ; encoding: [0x6a,0x02,0x80,0xbe] +0x6a,0x02,0x80,0xbe + +# GFX11: s_cmov_b32 s105, s104 ; encoding: [0x68,0x02,0xe9,0xbe] +0x68,0x02,0xe9,0xbe + +# GFX11: s_cmov_b32 s105, s1 ; encoding: [0x01,0x02,0xe9,0xbe] +0x01,0x02,0xe9,0xbe + +# GFX11: s_cmov_b32 vcc_hi, s1 ; encoding: [0x01,0x02,0xeb,0xbe] +0x01,0x02,0xeb,0xbe + +# GFX11: s_cmov_b32 vcc_lo, s1 ; encoding: [0x01,0x02,0xea,0xbe] +0x01,0x02,0xea,0xbe + +# GFX11: s_cmov_b64 exec, s[2:3] ; encoding: [0x02,0x03,0xfe,0xbe] +0x02,0x03,0xfe,0xbe + +# GFX11: s_cmov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x03,0x80,0xbe] +0xf0,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], 0 ; encoding: [0x80,0x03,0x80,0xbe] +0x80,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x03,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x03,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_cmov_b64 s[0:1], -1 ; encoding: [0xc1,0x03,0x80,0xbe] +0xc1,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], -4.0 ; encoding: [0xf7,0x03,0x80,0xbe] +0xf7,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], exec ; encoding: [0x7e,0x03,0x80,0xbe] +0x7e,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], s[102:103] ; encoding: [0x66,0x03,0x80,0xbe] +0x66,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], s[2:3] ; encoding: [0x02,0x03,0x80,0xbe] +0x02,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[0:1], vcc ; encoding: [0x6a,0x03,0x80,0xbe] +0x6a,0x03,0x80,0xbe + +# GFX11: s_cmov_b64 s[104:105], s[102:103] ; encoding: [0x66,0x03,0xe8,0xbe] +0x66,0x03,0xe8,0xbe + +# GFX11: s_cmov_b64 s[104:105], s[2:3] ; encoding: [0x02,0x03,0xe8,0xbe] +0x02,0x03,0xe8,0xbe + +# GFX11: s_cmov_b64 vcc, s[2:3] ; encoding: [0x02,0x03,0xea,0xbe] +0x02,0x03,0xea,0xbe + +# GFX11: s_cmovk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb1] +0x34,0x12,0x7f,0xb1 + +# GFX11: s_cmovk_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb1] +0x34,0x12,0x7e,0xb1 + +# GFX11: s_cmovk_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb1] +0x34,0x12,0x7d,0xb1 + +# GFX11: s_cmovk_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb1] +0x34,0x12,0x00,0xb1 + +# GFX11: s_cmovk_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb1] +0xd1,0xc1,0x00,0xb1 + +# GFX11: s_cmovk_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb1] +0x34,0x12,0x69,0xb1 + +# GFX11: s_cmovk_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb1] +0x34,0x12,0x6b,0xb1 + +# GFX11: s_cmovk_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb1] +0x34,0x12,0x6a,0xb1 + +# GFX11: s_cmp_eq_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x00,0xbf] +0x7f,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x00,0xbf] +0x7e,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 m0, s1 ; encoding: [0x7d,0x01,0x00,0xbf] +0x7d,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x00,0xbf] +0x00,0xf0,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, 0 ; encoding: [0x00,0x80,0x00,0xbf] +0x00,0x80,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x00,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x00,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_eq_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x00,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x00,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_eq_i32 s0, -1 ; encoding: [0x00,0xc1,0x00,0xbf] +0x00,0xc1,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x00,0xbf] +0x00,0xf7,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x00,0xbf] +0x00,0x7f,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x00,0xbf] +0x00,0x7e,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, m0 ; encoding: [0x00,0x7d,0x00,0xbf] +0x00,0x7d,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, s104 ; encoding: [0x00,0x68,0x00,0xbf] +0x00,0x68,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, s1 ; encoding: [0x00,0x01,0x00,0xbf] +0x00,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x00,0xbf] +0x00,0x6b,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x00,0xbf] +0x00,0x6a,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s105, s104 ; encoding: [0x69,0x68,0x00,0xbf] +0x69,0x68,0x00,0xbf + +# GFX11: s_cmp_eq_i32 s105, s1 ; encoding: [0x69,0x01,0x00,0xbf] +0x69,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x00,0xbf] +0x6b,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x00,0xbf] +0x6a,0x01,0x00,0xbf + +# GFX11: s_cmp_eq_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x06,0xbf] +0x7f,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x06,0xbf] +0x7e,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 m0, s1 ; encoding: [0x7d,0x01,0x06,0xbf] +0x7d,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x06,0xbf] +0x00,0xf0,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, 0 ; encoding: [0x00,0x80,0x06,0xbf] +0x00,0x80,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x06,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x06,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_eq_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x06,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x06,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_eq_u32 s0, -1 ; encoding: [0x00,0xc1,0x06,0xbf] +0x00,0xc1,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x06,0xbf] +0x00,0xf7,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x06,0xbf] +0x00,0x7f,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x06,0xbf] +0x00,0x7e,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, m0 ; encoding: [0x00,0x7d,0x06,0xbf] +0x00,0x7d,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, s104 ; encoding: [0x00,0x68,0x06,0xbf] +0x00,0x68,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, s1 ; encoding: [0x00,0x01,0x06,0xbf] +0x00,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x06,0xbf] +0x00,0x6b,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x06,0xbf] +0x00,0x6a,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s105, s104 ; encoding: [0x69,0x68,0x06,0xbf] +0x69,0x68,0x06,0xbf + +# GFX11: s_cmp_eq_u32 s105, s1 ; encoding: [0x69,0x01,0x06,0xbf] +0x69,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x06,0xbf] +0x6b,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x06,0xbf] +0x6a,0x01,0x06,0xbf + +# GFX11: s_cmp_eq_u64 exec, s[2:3] ; encoding: [0x7e,0x02,0x10,0xbf] +0x7e,0x02,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], 0.5 ; encoding: [0x00,0xf0,0x10,0xbf] +0x00,0xf0,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], 0 ; encoding: [0x00,0x80,0x10,0xbf] +0x00,0x80,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], 0x3f717273 ; encoding: [0x00,0xff,0x10,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x10,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_eq_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x10,0xbf] +0x00,0xc1,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], -4.0 ; encoding: [0x00,0xf7,0x10,0xbf] +0x00,0xf7,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], exec ; encoding: [0x00,0x7e,0x10,0xbf] +0x00,0x7e,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], s[102:103] ; encoding: [0x00,0x66,0x10,0xbf] +0x00,0x66,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x10,0xbf] +0x00,0x02,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[0:1], vcc ; encoding: [0x00,0x6a,0x10,0xbf] +0x00,0x6a,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[104:105], s[102:103] ; encoding: [0x68,0x66,0x10,0xbf] +0x68,0x66,0x10,0xbf + +# GFX11: s_cmp_eq_u64 s[104:105], s[2:3] ; encoding: [0x68,0x02,0x10,0xbf] +0x68,0x02,0x10,0xbf + +# GFX11: s_cmp_eq_u64 vcc, s[2:3] ; encoding: [0x6a,0x02,0x10,0xbf] +0x6a,0x02,0x10,0xbf + +# GFX11: s_cmp_ge_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x03,0xbf] +0x7f,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x03,0xbf] +0x7e,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 m0, s1 ; encoding: [0x7d,0x01,0x03,0xbf] +0x7d,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x03,0xbf] +0x00,0xf0,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, 0 ; encoding: [0x00,0x80,0x03,0xbf] +0x00,0x80,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x03,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x03,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_ge_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x03,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x03,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_ge_i32 s0, -1 ; encoding: [0x00,0xc1,0x03,0xbf] +0x00,0xc1,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x03,0xbf] +0x00,0xf7,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x03,0xbf] +0x00,0x7f,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x03,0xbf] +0x00,0x7e,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, m0 ; encoding: [0x00,0x7d,0x03,0xbf] +0x00,0x7d,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, s104 ; encoding: [0x00,0x68,0x03,0xbf] +0x00,0x68,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, s1 ; encoding: [0x00,0x01,0x03,0xbf] +0x00,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x03,0xbf] +0x00,0x6b,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x03,0xbf] +0x00,0x6a,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s105, s104 ; encoding: [0x69,0x68,0x03,0xbf] +0x69,0x68,0x03,0xbf + +# GFX11: s_cmp_ge_i32 s105, s1 ; encoding: [0x69,0x01,0x03,0xbf] +0x69,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x03,0xbf] +0x6b,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x03,0xbf] +0x6a,0x01,0x03,0xbf + +# GFX11: s_cmp_ge_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x09,0xbf] +0x7f,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x09,0xbf] +0x7e,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 m0, s1 ; encoding: [0x7d,0x01,0x09,0xbf] +0x7d,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x09,0xbf] +0x00,0xf0,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, 0 ; encoding: [0x00,0x80,0x09,0xbf] +0x00,0x80,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x09,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x09,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_ge_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x09,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x09,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_ge_u32 s0, -1 ; encoding: [0x00,0xc1,0x09,0xbf] +0x00,0xc1,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x09,0xbf] +0x00,0xf7,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x09,0xbf] +0x00,0x7f,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x09,0xbf] +0x00,0x7e,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, m0 ; encoding: [0x00,0x7d,0x09,0xbf] +0x00,0x7d,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, s104 ; encoding: [0x00,0x68,0x09,0xbf] +0x00,0x68,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, s1 ; encoding: [0x00,0x01,0x09,0xbf] +0x00,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x09,0xbf] +0x00,0x6b,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x09,0xbf] +0x00,0x6a,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s105, s104 ; encoding: [0x69,0x68,0x09,0xbf] +0x69,0x68,0x09,0xbf + +# GFX11: s_cmp_ge_u32 s105, s1 ; encoding: [0x69,0x01,0x09,0xbf] +0x69,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x09,0xbf] +0x6b,0x01,0x09,0xbf + +# GFX11: s_cmp_ge_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x09,0xbf] +0x6a,0x01,0x09,0xbf + +# GFX11: s_cmp_gt_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x02,0xbf] +0x7f,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x02,0xbf] +0x7e,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 m0, s1 ; encoding: [0x7d,0x01,0x02,0xbf] +0x7d,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x02,0xbf] +0x00,0xf0,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, 0 ; encoding: [0x00,0x80,0x02,0xbf] +0x00,0x80,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x02,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x02,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_gt_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x02,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x02,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_gt_i32 s0, -1 ; encoding: [0x00,0xc1,0x02,0xbf] +0x00,0xc1,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x02,0xbf] +0x00,0xf7,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x02,0xbf] +0x00,0x7f,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x02,0xbf] +0x00,0x7e,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, m0 ; encoding: [0x00,0x7d,0x02,0xbf] +0x00,0x7d,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, s104 ; encoding: [0x00,0x68,0x02,0xbf] +0x00,0x68,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, s1 ; encoding: [0x00,0x01,0x02,0xbf] +0x00,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x02,0xbf] +0x00,0x6b,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x02,0xbf] +0x00,0x6a,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s105, s104 ; encoding: [0x69,0x68,0x02,0xbf] +0x69,0x68,0x02,0xbf + +# GFX11: s_cmp_gt_i32 s105, s1 ; encoding: [0x69,0x01,0x02,0xbf] +0x69,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x02,0xbf] +0x6b,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x02,0xbf] +0x6a,0x01,0x02,0xbf + +# GFX11: s_cmp_gt_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x08,0xbf] +0x7f,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x08,0xbf] +0x7e,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 m0, s1 ; encoding: [0x7d,0x01,0x08,0xbf] +0x7d,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x08,0xbf] +0x00,0xf0,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, 0 ; encoding: [0x00,0x80,0x08,0xbf] +0x00,0x80,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x08,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x08,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_gt_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x08,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x08,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_gt_u32 s0, -1 ; encoding: [0x00,0xc1,0x08,0xbf] +0x00,0xc1,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x08,0xbf] +0x00,0xf7,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x08,0xbf] +0x00,0x7f,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x08,0xbf] +0x00,0x7e,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, m0 ; encoding: [0x00,0x7d,0x08,0xbf] +0x00,0x7d,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, s104 ; encoding: [0x00,0x68,0x08,0xbf] +0x00,0x68,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, s1 ; encoding: [0x00,0x01,0x08,0xbf] +0x00,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x08,0xbf] +0x00,0x6b,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x08,0xbf] +0x00,0x6a,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s105, s104 ; encoding: [0x69,0x68,0x08,0xbf] +0x69,0x68,0x08,0xbf + +# GFX11: s_cmp_gt_u32 s105, s1 ; encoding: [0x69,0x01,0x08,0xbf] +0x69,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x08,0xbf] +0x6b,0x01,0x08,0xbf + +# GFX11: s_cmp_gt_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x08,0xbf] +0x6a,0x01,0x08,0xbf + +# GFX11: s_cmpk_eq_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb1] +0x34,0x12,0xff,0xb1 + +# GFX11: s_cmpk_eq_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb1] +0x34,0x12,0xfe,0xb1 + +# GFX11: s_cmpk_eq_i32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb1] +0x34,0x12,0xfd,0xb1 + +# GFX11: s_cmpk_eq_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb1] +0x34,0x12,0x80,0xb1 + +# GFX11: s_cmpk_eq_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb1] +0xd1,0xc1,0x80,0xb1 + +# GFX11: s_cmpk_eq_i32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb1] +0x34,0x12,0xe9,0xb1 + +# GFX11: s_cmpk_eq_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb1] +0x34,0x12,0xeb,0xb1 + +# GFX11: s_cmpk_eq_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb1] +0x34,0x12,0xea,0xb1 + +# GFX11: s_cmpk_eq_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb4] +0x34,0x12,0xff,0xb4 + +# GFX11: s_cmpk_eq_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb4] +0x34,0x12,0xfe,0xb4 + +# GFX11: s_cmpk_eq_u32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb4] +0x34,0x12,0xfd,0xb4 + +# GFX11: s_cmpk_eq_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb4] +0x34,0x12,0x80,0xb4 + +# GFX11: s_cmpk_eq_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb4] +0xd1,0xc1,0x80,0xb4 + +# GFX11: s_cmpk_eq_u32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb4] +0x34,0x12,0xe9,0xb4 + +# GFX11: s_cmpk_eq_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb4] +0x34,0x12,0xeb,0xb4 + +# GFX11: s_cmpk_eq_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb4] +0x34,0x12,0xea,0xb4 + +# GFX11: s_cmpk_ge_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb3] +0x34,0x12,0x7f,0xb3 + +# GFX11: s_cmpk_ge_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb3] +0x34,0x12,0x7e,0xb3 + +# GFX11: s_cmpk_ge_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb3] +0x34,0x12,0x7d,0xb3 + +# GFX11: s_cmpk_ge_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb3] +0x34,0x12,0x00,0xb3 + +# GFX11: s_cmpk_ge_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb3] +0xd1,0xc1,0x00,0xb3 + +# GFX11: s_cmpk_ge_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb3] +0x34,0x12,0x69,0xb3 + +# GFX11: s_cmpk_ge_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb3] +0x34,0x12,0x6b,0xb3 + +# GFX11: s_cmpk_ge_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb3] +0x34,0x12,0x6a,0xb3 + +# GFX11: s_cmpk_ge_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb6] +0x34,0x12,0x7f,0xb6 + +# GFX11: s_cmpk_ge_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb6] +0x34,0x12,0x7e,0xb6 + +# GFX11: s_cmpk_ge_u32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb6] +0x34,0x12,0x7d,0xb6 + +# GFX11: s_cmpk_ge_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb6] +0x34,0x12,0x00,0xb6 + +# GFX11: s_cmpk_ge_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb6] +0xd1,0xc1,0x00,0xb6 + +# GFX11: s_cmpk_ge_u32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb6] +0x34,0x12,0x69,0xb6 + +# GFX11: s_cmpk_ge_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb6] +0x34,0x12,0x6b,0xb6 + +# GFX11: s_cmpk_ge_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb6] +0x34,0x12,0x6a,0xb6 + +# GFX11: s_cmpk_gt_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb2] +0x34,0x12,0xff,0xb2 + +# GFX11: s_cmpk_gt_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb2] +0x34,0x12,0xfe,0xb2 + +# GFX11: s_cmpk_gt_i32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb2] +0x34,0x12,0xfd,0xb2 + +# GFX11: s_cmpk_gt_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb2] +0x34,0x12,0x80,0xb2 + +# GFX11: s_cmpk_gt_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb2] +0xd1,0xc1,0x80,0xb2 + +# GFX11: s_cmpk_gt_i32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb2] +0x34,0x12,0xe9,0xb2 + +# GFX11: s_cmpk_gt_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb2] +0x34,0x12,0xeb,0xb2 + +# GFX11: s_cmpk_gt_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb2] +0x34,0x12,0xea,0xb2 + +# GFX11: s_cmpk_gt_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb5] +0x34,0x12,0xff,0xb5 + +# GFX11: s_cmpk_gt_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb5] +0x34,0x12,0xfe,0xb5 + +# GFX11: s_cmpk_gt_u32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb5] +0x34,0x12,0xfd,0xb5 + +# GFX11: s_cmpk_gt_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb5] +0x34,0x12,0x80,0xb5 + +# GFX11: s_cmpk_gt_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb5] +0xd1,0xc1,0x80,0xb5 + +# GFX11: s_cmpk_gt_u32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb5] +0x34,0x12,0xe9,0xb5 + +# GFX11: s_cmpk_gt_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb5] +0x34,0x12,0xeb,0xb5 + +# GFX11: s_cmpk_gt_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb5] +0x34,0x12,0xea,0xb5 + +# GFX11: s_cmpk_le_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb4] +0x34,0x12,0x7f,0xb4 + +# GFX11: s_cmpk_le_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb4] +0x34,0x12,0x7e,0xb4 + +# GFX11: s_cmpk_le_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb4] +0x34,0x12,0x7d,0xb4 + +# GFX11: s_cmpk_le_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb4] +0x34,0x12,0x00,0xb4 + +# GFX11: s_cmpk_le_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb4] +0xd1,0xc1,0x00,0xb4 + +# GFX11: s_cmpk_le_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb4] +0x34,0x12,0x69,0xb4 + +# GFX11: s_cmpk_le_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb4] +0x34,0x12,0x6b,0xb4 + +# GFX11: s_cmpk_le_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb4] +0x34,0x12,0x6a,0xb4 + +# GFX11: s_cmpk_le_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb7] +0x34,0x12,0x7f,0xb7 + +# GFX11: s_cmpk_le_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb7] +0x34,0x12,0x7e,0xb7 + +# GFX11: s_cmpk_le_u32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb7] +0x34,0x12,0x7d,0xb7 + +# GFX11: s_cmpk_le_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb7] +0x34,0x12,0x00,0xb7 + +# GFX11: s_cmpk_le_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb7] +0xd1,0xc1,0x00,0xb7 + +# GFX11: s_cmpk_le_u32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb7] +0x34,0x12,0x69,0xb7 + +# GFX11: s_cmpk_le_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb7] +0x34,0x12,0x6b,0xb7 + +# GFX11: s_cmpk_le_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb7] +0x34,0x12,0x6a,0xb7 + +# GFX11: s_cmpk_lg_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb2] +0x34,0x12,0x7f,0xb2 + +# GFX11: s_cmpk_lg_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb2] +0x34,0x12,0x7e,0xb2 + +# GFX11: s_cmpk_lg_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb2] +0x34,0x12,0x7d,0xb2 + +# GFX11: s_cmpk_lg_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb2] +0x34,0x12,0x00,0xb2 + +# GFX11: s_cmpk_lg_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb2] +0xd1,0xc1,0x00,0xb2 + +# GFX11: s_cmpk_lg_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb2] +0x34,0x12,0x69,0xb2 + +# GFX11: s_cmpk_lg_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb2] +0x34,0x12,0x6b,0xb2 + +# GFX11: s_cmpk_lg_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb2] +0x34,0x12,0x6a,0xb2 + +# GFX11: s_cmpk_lg_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb5] +0x34,0x12,0x7f,0xb5 + +# GFX11: s_cmpk_lg_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb5] +0x34,0x12,0x7e,0xb5 + +# GFX11: s_cmpk_lg_u32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb5] +0x34,0x12,0x7d,0xb5 + +# GFX11: s_cmpk_lg_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb5] +0x34,0x12,0x00,0xb5 + +# GFX11: s_cmpk_lg_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb5] +0xd1,0xc1,0x00,0xb5 + +# GFX11: s_cmpk_lg_u32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb5] +0x34,0x12,0x69,0xb5 + +# GFX11: s_cmpk_lg_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb5] +0x34,0x12,0x6b,0xb5 + +# GFX11: s_cmpk_lg_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb5] +0x34,0x12,0x6a,0xb5 + +# GFX11: s_cmpk_lt_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb3] +0x34,0x12,0xff,0xb3 + +# GFX11: s_cmpk_lt_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb3] +0x34,0x12,0xfe,0xb3 + +# GFX11: s_cmpk_lt_i32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb3] +0x34,0x12,0xfd,0xb3 + +# GFX11: s_cmpk_lt_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb3] +0x34,0x12,0x80,0xb3 + +# GFX11: s_cmpk_lt_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb3] +0xd1,0xc1,0x80,0xb3 + +# GFX11: s_cmpk_lt_i32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb3] +0x34,0x12,0xe9,0xb3 + +# GFX11: s_cmpk_lt_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb3] +0x34,0x12,0xeb,0xb3 + +# GFX11: s_cmpk_lt_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb3] +0x34,0x12,0xea,0xb3 + +# GFX11: s_cmpk_lt_u32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xb6] +0x34,0x12,0xff,0xb6 + +# GFX11: s_cmpk_lt_u32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xb6] +0x34,0x12,0xfe,0xb6 + +# GFX11: s_cmpk_lt_u32 m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xb6] +0x34,0x12,0xfd,0xb6 + +# GFX11: s_cmpk_lt_u32 s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xb6] +0x34,0x12,0x80,0xb6 + +# GFX11: s_cmpk_lt_u32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb6] +0xd1,0xc1,0x80,0xb6 + +# GFX11: s_cmpk_lt_u32 s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xb6] +0x34,0x12,0xe9,0xb6 + +# GFX11: s_cmpk_lt_u32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xb6] +0x34,0x12,0xeb,0xb6 + +# GFX11: s_cmpk_lt_u32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xb6] +0x34,0x12,0xea,0xb6 + +# GFX11: s_cmp_le_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x05,0xbf] +0x7f,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x05,0xbf] +0x7e,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 m0, s1 ; encoding: [0x7d,0x01,0x05,0xbf] +0x7d,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x05,0xbf] +0x00,0xf0,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, 0 ; encoding: [0x00,0x80,0x05,0xbf] +0x00,0x80,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x05,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x05,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_le_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x05,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x05,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_le_i32 s0, -1 ; encoding: [0x00,0xc1,0x05,0xbf] +0x00,0xc1,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x05,0xbf] +0x00,0xf7,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x05,0xbf] +0x00,0x7f,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x05,0xbf] +0x00,0x7e,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, m0 ; encoding: [0x00,0x7d,0x05,0xbf] +0x00,0x7d,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, s104 ; encoding: [0x00,0x68,0x05,0xbf] +0x00,0x68,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, s1 ; encoding: [0x00,0x01,0x05,0xbf] +0x00,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x05,0xbf] +0x00,0x6b,0x05,0xbf + +# GFX11: s_cmp_le_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x05,0xbf] +0x00,0x6a,0x05,0xbf + +# GFX11: s_cmp_le_i32 s105, s104 ; encoding: [0x69,0x68,0x05,0xbf] +0x69,0x68,0x05,0xbf + +# GFX11: s_cmp_le_i32 s105, s1 ; encoding: [0x69,0x01,0x05,0xbf] +0x69,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x05,0xbf] +0x6b,0x01,0x05,0xbf + +# GFX11: s_cmp_le_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x05,0xbf] +0x6a,0x01,0x05,0xbf + +# GFX11: s_cmp_le_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x0b,0xbf] +0x7f,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x0b,0xbf] +0x7e,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 m0, s1 ; encoding: [0x7d,0x01,0x0b,0xbf] +0x7d,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x0b,0xbf] +0x00,0xf0,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, 0 ; encoding: [0x00,0x80,0x0b,0xbf] +0x00,0x80,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x0b,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0b,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_le_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x0b,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0b,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_le_u32 s0, -1 ; encoding: [0x00,0xc1,0x0b,0xbf] +0x00,0xc1,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x0b,0xbf] +0x00,0xf7,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x0b,0xbf] +0x00,0x7f,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x0b,0xbf] +0x00,0x7e,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, m0 ; encoding: [0x00,0x7d,0x0b,0xbf] +0x00,0x7d,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, s104 ; encoding: [0x00,0x68,0x0b,0xbf] +0x00,0x68,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, s1 ; encoding: [0x00,0x01,0x0b,0xbf] +0x00,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x0b,0xbf] +0x00,0x6b,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x0b,0xbf] +0x00,0x6a,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s105, s104 ; encoding: [0x69,0x68,0x0b,0xbf] +0x69,0x68,0x0b,0xbf + +# GFX11: s_cmp_le_u32 s105, s1 ; encoding: [0x69,0x01,0x0b,0xbf] +0x69,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x0b,0xbf] +0x6b,0x01,0x0b,0xbf + +# GFX11: s_cmp_le_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x0b,0xbf] +0x6a,0x01,0x0b,0xbf + +# GFX11: s_cmp_lg_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x01,0xbf] +0x7f,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x01,0xbf] +0x7e,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 m0, s1 ; encoding: [0x7d,0x01,0x01,0xbf] +0x7d,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x01,0xbf] +0x00,0xf0,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, 0 ; encoding: [0x00,0x80,0x01,0xbf] +0x00,0x80,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x01,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x01,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_lg_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x01,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x01,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_lg_i32 s0, -1 ; encoding: [0x00,0xc1,0x01,0xbf] +0x00,0xc1,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x01,0xbf] +0x00,0xf7,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x01,0xbf] +0x00,0x7f,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x01,0xbf] +0x00,0x7e,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, m0 ; encoding: [0x00,0x7d,0x01,0xbf] +0x00,0x7d,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, s104 ; encoding: [0x00,0x68,0x01,0xbf] +0x00,0x68,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, s1 ; encoding: [0x00,0x01,0x01,0xbf] +0x00,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x01,0xbf] +0x00,0x6b,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x01,0xbf] +0x00,0x6a,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s105, s104 ; encoding: [0x69,0x68,0x01,0xbf] +0x69,0x68,0x01,0xbf + +# GFX11: s_cmp_lg_i32 s105, s1 ; encoding: [0x69,0x01,0x01,0xbf] +0x69,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x01,0xbf] +0x6b,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x01,0xbf] +0x6a,0x01,0x01,0xbf + +# GFX11: s_cmp_lg_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x07,0xbf] +0x7f,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x07,0xbf] +0x7e,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 m0, s1 ; encoding: [0x7d,0x01,0x07,0xbf] +0x7d,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x07,0xbf] +0x00,0xf0,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, 0 ; encoding: [0x00,0x80,0x07,0xbf] +0x00,0x80,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x07,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x07,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_lg_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x07,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x07,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_lg_u32 s0, -1 ; encoding: [0x00,0xc1,0x07,0xbf] +0x00,0xc1,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x07,0xbf] +0x00,0xf7,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x07,0xbf] +0x00,0x7f,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x07,0xbf] +0x00,0x7e,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, m0 ; encoding: [0x00,0x7d,0x07,0xbf] +0x00,0x7d,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, s104 ; encoding: [0x00,0x68,0x07,0xbf] +0x00,0x68,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, s1 ; encoding: [0x00,0x01,0x07,0xbf] +0x00,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x07,0xbf] +0x00,0x6b,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x07,0xbf] +0x00,0x6a,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s105, s104 ; encoding: [0x69,0x68,0x07,0xbf] +0x69,0x68,0x07,0xbf + +# GFX11: s_cmp_lg_u32 s105, s1 ; encoding: [0x69,0x01,0x07,0xbf] +0x69,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x07,0xbf] +0x6b,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x07,0xbf] +0x6a,0x01,0x07,0xbf + +# GFX11: s_cmp_lg_u64 exec, s[2:3] ; encoding: [0x7e,0x02,0x11,0xbf] +0x7e,0x02,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], 0.5 ; encoding: [0x00,0xf0,0x11,0xbf] +0x00,0xf0,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], 0 ; encoding: [0x00,0x80,0x11,0xbf] +0x00,0x80,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], 0x3f717273 ; encoding: [0x00,0xff,0x11,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x11,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_lg_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x11,0xbf] +0x00,0xc1,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], -4.0 ; encoding: [0x00,0xf7,0x11,0xbf] +0x00,0xf7,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], exec ; encoding: [0x00,0x7e,0x11,0xbf] +0x00,0x7e,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], s[102:103] ; encoding: [0x00,0x66,0x11,0xbf] +0x00,0x66,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x11,0xbf] +0x00,0x02,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[0:1], vcc ; encoding: [0x00,0x6a,0x11,0xbf] +0x00,0x6a,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[104:105], s[102:103] ; encoding: [0x68,0x66,0x11,0xbf] +0x68,0x66,0x11,0xbf + +# GFX11: s_cmp_lg_u64 s[104:105], s[2:3] ; encoding: [0x68,0x02,0x11,0xbf] +0x68,0x02,0x11,0xbf + +# GFX11: s_cmp_lg_u64 vcc, s[2:3] ; encoding: [0x6a,0x02,0x11,0xbf] +0x6a,0x02,0x11,0xbf + +# GFX11: s_cmp_lt_i32 exec_hi, s1 ; encoding: [0x7f,0x01,0x04,0xbf] +0x7f,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 exec_lo, s1 ; encoding: [0x7e,0x01,0x04,0xbf] +0x7e,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 m0, s1 ; encoding: [0x7d,0x01,0x04,0xbf] +0x7d,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, 0.5 ; encoding: [0x00,0xf0,0x04,0xbf] +0x00,0xf0,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, 0 ; encoding: [0x00,0x80,0x04,0xbf] +0x00,0x80,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x04,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x04,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_lt_i32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x04,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x04,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_lt_i32 s0, -1 ; encoding: [0x00,0xc1,0x04,0xbf] +0x00,0xc1,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, -4.0 ; encoding: [0x00,0xf7,0x04,0xbf] +0x00,0xf7,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, exec_hi ; encoding: [0x00,0x7f,0x04,0xbf] +0x00,0x7f,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, exec_lo ; encoding: [0x00,0x7e,0x04,0xbf] +0x00,0x7e,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, m0 ; encoding: [0x00,0x7d,0x04,0xbf] +0x00,0x7d,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, s104 ; encoding: [0x00,0x68,0x04,0xbf] +0x00,0x68,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, s1 ; encoding: [0x00,0x01,0x04,0xbf] +0x00,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, vcc_hi ; encoding: [0x00,0x6b,0x04,0xbf] +0x00,0x6b,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s0, vcc_lo ; encoding: [0x00,0x6a,0x04,0xbf] +0x00,0x6a,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s105, s104 ; encoding: [0x69,0x68,0x04,0xbf] +0x69,0x68,0x04,0xbf + +# GFX11: s_cmp_lt_i32 s105, s1 ; encoding: [0x69,0x01,0x04,0xbf] +0x69,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x04,0xbf] +0x6b,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_i32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x04,0xbf] +0x6a,0x01,0x04,0xbf + +# GFX11: s_cmp_lt_u32 exec_hi, s1 ; encoding: [0x7f,0x01,0x0a,0xbf] +0x7f,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 exec_lo, s1 ; encoding: [0x7e,0x01,0x0a,0xbf] +0x7e,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 m0, s1 ; encoding: [0x7d,0x01,0x0a,0xbf] +0x7d,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, 0.5 ; encoding: [0x00,0xf0,0x0a,0xbf] +0x00,0xf0,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, 0 ; encoding: [0x00,0x80,0x0a,0xbf] +0x00,0x80,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, 0x3f717273 ; encoding: [0x00,0xff,0x0a,0xbf,0x73,0x72,0x71,0x3f] +0x00,0xff,0x0a,0xbf,0x73,0x72,0x71,0x3f + +# GFX11: s_cmp_lt_u32 s0, 0xaf123456 ; encoding: [0x00,0xff,0x0a,0xbf,0x56,0x34,0x12,0xaf] +0x00,0xff,0x0a,0xbf,0x56,0x34,0x12,0xaf + +# GFX11: s_cmp_lt_u32 s0, -1 ; encoding: [0x00,0xc1,0x0a,0xbf] +0x00,0xc1,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, -4.0 ; encoding: [0x00,0xf7,0x0a,0xbf] +0x00,0xf7,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, exec_hi ; encoding: [0x00,0x7f,0x0a,0xbf] +0x00,0x7f,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, exec_lo ; encoding: [0x00,0x7e,0x0a,0xbf] +0x00,0x7e,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, m0 ; encoding: [0x00,0x7d,0x0a,0xbf] +0x00,0x7d,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, s104 ; encoding: [0x00,0x68,0x0a,0xbf] +0x00,0x68,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, s1 ; encoding: [0x00,0x01,0x0a,0xbf] +0x00,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, vcc_hi ; encoding: [0x00,0x6b,0x0a,0xbf] +0x00,0x6b,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s0, vcc_lo ; encoding: [0x00,0x6a,0x0a,0xbf] +0x00,0x6a,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s105, s104 ; encoding: [0x69,0x68,0x0a,0xbf] +0x69,0x68,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 s105, s1 ; encoding: [0x69,0x01,0x0a,0xbf] +0x69,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 vcc_hi, s1 ; encoding: [0x6b,0x01,0x0a,0xbf] +0x6b,0x01,0x0a,0xbf + +# GFX11: s_cmp_lt_u32 vcc_lo, s1 ; encoding: [0x6a,0x01,0x0a,0xbf] +0x6a,0x01,0x0a,0xbf + +# GFX11: s_code_end ; encoding: [0x00,0x00,0x9f,0xbf] +0x00,0x00,0x9f,0xbf + +# GFX11: s_cselect_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x98] +0x01,0x02,0x7f,0x98 + +# GFX11: s_cselect_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x98] +0x01,0x02,0x7e,0x98 + +# GFX11: s_cselect_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x98] +0x01,0x02,0x7d,0x98 + +# GFX11: s_cselect_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x98] +0xf0,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x98] +0x80,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x98,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x98,0x73,0x72,0x71,0x3f + +# GFX11: s_cselect_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x98,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x98,0x56,0x34,0x12,0xaf + +# GFX11: s_cselect_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x98] +0xc1,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x98] +0xf7,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x98] +0x7f,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x98] +0x7e,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x98] +0x7d,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x98] +0x68,0x67,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x98] +0x68,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x98] +0x01,0xf0,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x98] +0x01,0x80,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x98,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x98,0x73,0x72,0x71,0x3f + +# GFX11: s_cselect_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x98,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x98,0x56,0x34,0x12,0xaf + +# GFX11: s_cselect_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x98] +0x01,0xc1,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x98] +0x01,0xf7,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x98] +0x01,0x7f,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x98] +0x01,0x7e,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x98] +0x01,0x7d,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x98] +0x01,0x67,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x98] +0x01,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x98] +0x01,0x6b,0x00,0x98 + +# GFX11: s_cselect_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x98] +0x01,0x6a,0x00,0x98 + +# GFX11: s_cselect_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x98] +0x6b,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x98] +0x6a,0x02,0x00,0x98 + +# GFX11: s_cselect_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x98] +0x68,0x67,0x69,0x98 + +# GFX11: s_cselect_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x98] +0x68,0x02,0x69,0x98 + +# GFX11: s_cselect_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x98] +0x01,0x67,0x69,0x98 + +# GFX11: s_cselect_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x98] +0x01,0x02,0x69,0x98 + +# GFX11: s_cselect_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x98] +0x01,0x02,0x6b,0x98 + +# GFX11: s_cselect_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x98] +0x01,0x02,0x6a,0x98 + +# GFX11: s_cselect_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x98] +0x02,0x04,0xfe,0x98 + +# GFX11: s_cselect_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x98] +0xf0,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x98] +0x80,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x98,0x73,0x72,0x71,0x3f + +# GFX11: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf + +# GFX11: s_cselect_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x98] +0xc1,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x98] +0xf7,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x98] +0x7e,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x98] +0x66,0x64,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x98] +0x66,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x98] +0x02,0xf0,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x98] +0x02,0x80,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x98,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x98,0x73,0x72,0x71,0x3f + +# GFX11: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf + +# GFX11: s_cselect_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x98] +0x02,0xc1,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x98] +0x02,0xf7,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x98] +0x02,0x7e,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x98] +0x02,0x64,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x98] +0x02,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x98] +0x02,0x6a,0x80,0x98 + +# GFX11: s_cselect_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x98] +0x6a,0x04,0x80,0x98 + +# GFX11: s_cselect_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x98] +0x66,0x64,0xe8,0x98 + +# GFX11: s_cselect_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x98] +0x66,0x04,0xe8,0x98 + +# GFX11: s_cselect_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x98] +0x02,0x64,0xe8,0x98 + +# GFX11: s_cselect_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x98] +0x02,0x04,0xe8,0x98 + +# GFX11: s_cselect_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x98] +0x02,0x04,0xea,0x98 + +# GFX11: s_ctz_i32_b32 exec_hi, s1 ; encoding: [0x01,0x08,0xff,0xbe] +0x01,0x08,0xff,0xbe + +# GFX11: s_ctz_i32_b32 exec_lo, s1 ; encoding: [0x01,0x08,0xfe,0xbe] +0x01,0x08,0xfe,0xbe + +# GFX11: s_ctz_i32_b32 m0, s1 ; encoding: [0x01,0x08,0xfd,0xbe] +0x01,0x08,0xfd,0xbe + +# GFX11: s_ctz_i32_b32 s0, 0.5 ; encoding: [0xf0,0x08,0x80,0xbe] +0xf0,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, 0 ; encoding: [0x80,0x08,0x80,0xbe] +0x80,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, 0x3f717273 ; encoding: [0xff,0x08,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x08,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_ctz_i32_b32 s0, 0xaf123456 ; encoding: [0xff,0x08,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x08,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_ctz_i32_b32 s0, -1 ; encoding: [0xc1,0x08,0x80,0xbe] +0xc1,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, -4.0 ; encoding: [0xf7,0x08,0x80,0xbe] +0xf7,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, exec_hi ; encoding: [0x7f,0x08,0x80,0xbe] +0x7f,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, exec_lo ; encoding: [0x7e,0x08,0x80,0xbe] +0x7e,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, m0 ; encoding: [0x7d,0x08,0x80,0xbe] +0x7d,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, s104 ; encoding: [0x68,0x08,0x80,0xbe] +0x68,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, s1 ; encoding: [0x01,0x08,0x80,0xbe] +0x01,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, vcc_hi ; encoding: [0x6b,0x08,0x80,0xbe] +0x6b,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s0, vcc_lo ; encoding: [0x6a,0x08,0x80,0xbe] +0x6a,0x08,0x80,0xbe + +# GFX11: s_ctz_i32_b32 s105, s104 ; encoding: [0x68,0x08,0xe9,0xbe] +0x68,0x08,0xe9,0xbe + +# GFX11: s_ctz_i32_b32 s105, s1 ; encoding: [0x01,0x08,0xe9,0xbe] +0x01,0x08,0xe9,0xbe + +# GFX11: s_ctz_i32_b32 vcc_hi, s1 ; encoding: [0x01,0x08,0xeb,0xbe] +0x01,0x08,0xeb,0xbe + +# GFX11: s_ctz_i32_b32 vcc_lo, s1 ; encoding: [0x01,0x08,0xea,0xbe] +0x01,0x08,0xea,0xbe + +# GFX11: s_ctz_i32_b64 exec_hi, s[2:3] ; encoding: [0x02,0x09,0xff,0xbe] +0x02,0x09,0xff,0xbe + +# GFX11: s_ctz_i32_b64 exec_lo, s[2:3] ; encoding: [0x02,0x09,0xfe,0xbe] +0x02,0x09,0xfe,0xbe + +# GFX11: s_ctz_i32_b64 m0, s[2:3] ; encoding: [0x02,0x09,0xfd,0xbe] +0x02,0x09,0xfd,0xbe + +# GFX11: s_ctz_i32_b64 s0, 0.5 ; encoding: [0xf0,0x09,0x80,0xbe] +0xf0,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, 0 ; encoding: [0x80,0x09,0x80,0xbe] +0x80,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, 0x3f717273 ; encoding: [0xff,0x09,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x09,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_ctz_i32_b64 s0, -1 ; encoding: [0xc1,0x09,0x80,0xbe] +0xc1,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, -4.0 ; encoding: [0xf7,0x09,0x80,0xbe] +0xf7,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, exec ; encoding: [0x7e,0x09,0x80,0xbe] +0x7e,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, s[102:103] ; encoding: [0x66,0x09,0x80,0xbe] +0x66,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, s[2:3] ; encoding: [0x02,0x09,0x80,0xbe] +0x02,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s0, vcc ; encoding: [0x6a,0x09,0x80,0xbe] +0x6a,0x09,0x80,0xbe + +# GFX11: s_ctz_i32_b64 s105, s[102:103] ; encoding: [0x66,0x09,0xe9,0xbe] +0x66,0x09,0xe9,0xbe + +# GFX11: s_ctz_i32_b64 s105, s[2:3] ; encoding: [0x02,0x09,0xe9,0xbe] +0x02,0x09,0xe9,0xbe + +# GFX11: s_ctz_i32_b64 vcc_hi, s[2:3] ; encoding: [0x02,0x09,0xeb,0xbe] +0x02,0x09,0xeb,0xbe + +# GFX11: s_ctz_i32_b64 vcc_lo, s[2:3] ; encoding: [0x02,0x09,0xea,0xbe] +0x02,0x09,0xea,0xbe + +# GFX11: s_decperflevel 0 ; encoding: [0x00,0x00,0xb9,0xbf] +0x00,0x00,0xb9,0xbf + +# GFX11: s_decperflevel 0x1234 ; encoding: [0x34,0x12,0xb9,0xbf] +0x34,0x12,0xb9,0xbf + +# GFX11: s_decperflevel 0xc1d1 ; encoding: [0xd1,0xc1,0xb9,0xbf] +0xd1,0xc1,0xb9,0xbf + +# GFX11: s_delay_alu 0 ; encoding: [0x00,0x00,0x87,0xbf] +0x00,0x00,0x87,0xbf + +# GFX11: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; encoding: [0x91,0x00,0x87,0xbf] +0x91,0x00,0x87,0xbf + +# GFX11: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf] +0x01,0x00,0x87,0xbf + +# GFX11: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; encoding: [0x81,0x04,0x87,0xbf] +0x81,0x04,0x87,0xbf + +# GFX11: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) ; encoding: [0x91,0x01,0x87,0xbf] +0x91,0x01,0x87,0xbf + +# GFX11: s_delay_alu instid0(/* invalid instid value */) | instskip(/* invalid instskip value */) | instid1(/* invalid instid value */) ; encoding: [0xff,0x07,0x87,0xbf] +0xff,0x07,0x87,0xbf + +# GFX11: s_denorm_mode 0 ; encoding: [0x00,0x00,0x92,0xbf] +0x00,0x00,0x92,0xbf + +# GFX11: s_denorm_mode 0x1234 ; encoding: [0x34,0x12,0x92,0xbf] +0x34,0x12,0x92,0xbf + +# GFX11: s_denorm_mode 0xc1d1 ; encoding: [0xd1,0xc1,0x92,0xbf] +0xd1,0xc1,0x92,0xbf + +# GFX11: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +0x00,0x00,0xb0,0xbf + +# GFX11: s_endpgm_saved ; encoding: [0x00,0x00,0xb1,0xbf] +0x00,0x00,0xb1,0xbf + +# GFX11: s_getpc_b64 exec ; encoding: [0x00,0x47,0xfe,0xbe] +0x00,0x47,0xfe,0xbe + +# GFX11: s_getpc_b64 s[0:1] ; encoding: [0x00,0x47,0x80,0xbe] +0x00,0x47,0x80,0xbe + +# GFX11: s_getpc_b64 s[104:105] ; encoding: [0x00,0x47,0xe8,0xbe] +0x00,0x47,0xe8,0xbe + +# GFX11: s_getpc_b64 vcc ; encoding: [0x00,0x47,0xea,0xbe] +0x00,0x47,0xea,0xbe + +# GFX11: s_getreg_b32 exec_hi, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xff,0xb8] +0x34,0x12,0xff,0xb8 + +# GFX11: s_getreg_b32 exec_lo, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xfe,0xb8] +0x34,0x12,0xfe,0xb8 + +# GFX11: s_getreg_b32 m0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xfd,0xb8] +0x34,0x12,0xfd,0xb8 + +# GFX11: s_getreg_b32 s0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0x80,0xb8] +0x34,0x12,0x80,0xb8 + +# GFX11: s_getreg_b32 s0, hwreg(17, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8] +0xd1,0xc1,0x80,0xb8 + +# GFX11: s_getreg_b32 s105, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xe9,0xb8] +0x34,0x12,0xe9,0xb8 + +# GFX11: s_getreg_b32 vcc_hi, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xeb,0xb8] +0x34,0x12,0xeb,0xb8 + +# GFX11: s_getreg_b32 vcc_lo, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xea,0xb8] +0x34,0x12,0xea,0xb8 + +# GFX11: s_icache_inv ; encoding: [0x00,0x00,0xbc,0xbf] +0x00,0x00,0xbc,0xbf + +# GFX11: s_incperflevel 0 ; encoding: [0x00,0x00,0xb8,0xbf] +0x00,0x00,0xb8,0xbf + +# GFX11: s_incperflevel 0x1234 ; encoding: [0x34,0x12,0xb8,0xbf] +0x34,0x12,0xb8,0xbf + +# GFX11: s_incperflevel 0xc1d1 ; encoding: [0xd1,0xc1,0xb8,0xbf] +0xd1,0xc1,0xb8,0xbf + +# GFX11: s_lshl1_add_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x87] +0x01,0x02,0x7f,0x87 + +# GFX11: s_lshl1_add_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x87] +0x01,0x02,0x7e,0x87 + +# GFX11: s_lshl1_add_u32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x87] +0x01,0x02,0x7d,0x87 + +# GFX11: s_lshl1_add_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x87] +0xf0,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x87] +0x80,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x87,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x87,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl1_add_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x87,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x87,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl1_add_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x87] +0xc1,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x87] +0xf7,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x87] +0x7f,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x87] +0x7e,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x87] +0x7d,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x87] +0x68,0x67,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x87] +0x68,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x87] +0x01,0xf0,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x87] +0x01,0x80,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x87,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x87,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl1_add_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x87,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x87,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl1_add_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x87] +0x01,0xc1,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x87] +0x01,0xf7,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x87] +0x01,0x7f,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x87] +0x01,0x7e,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x87] +0x01,0x7d,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x87] +0x01,0x67,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x87] +0x01,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x87] +0x01,0x6b,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x87] +0x01,0x6a,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x87] +0x6b,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x87] +0x6a,0x02,0x00,0x87 + +# GFX11: s_lshl1_add_u32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x87] +0x68,0x67,0x69,0x87 + +# GFX11: s_lshl1_add_u32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x87] +0x68,0x02,0x69,0x87 + +# GFX11: s_lshl1_add_u32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x87] +0x01,0x67,0x69,0x87 + +# GFX11: s_lshl1_add_u32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x87] +0x01,0x02,0x69,0x87 + +# GFX11: s_lshl1_add_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x87] +0x01,0x02,0x6b,0x87 + +# GFX11: s_lshl1_add_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x87] +0x01,0x02,0x6a,0x87 + +# GFX11: s_lshl2_add_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x87] +0x01,0x02,0xff,0x87 + +# GFX11: s_lshl2_add_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x87] +0x01,0x02,0xfe,0x87 + +# GFX11: s_lshl2_add_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x87] +0x01,0x02,0xfd,0x87 + +# GFX11: s_lshl2_add_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x87] +0xf0,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x87] +0x80,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x87,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x87,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl2_add_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x87,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x87,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl2_add_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x87] +0xc1,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x87] +0xf7,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x87] +0x7f,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x87] +0x7e,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x87] +0x7d,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x87] +0x68,0x67,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x87] +0x68,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x87] +0x01,0xf0,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x87] +0x01,0x80,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x87,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x87,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl2_add_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x87,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x87,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl2_add_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x87] +0x01,0xc1,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x87] +0x01,0xf7,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x87] +0x01,0x7f,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x87] +0x01,0x7e,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x87] +0x01,0x7d,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x87] +0x01,0x67,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x87] +0x01,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x87] +0x01,0x6b,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x87] +0x01,0x6a,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x87] +0x6b,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x87] +0x6a,0x02,0x80,0x87 + +# GFX11: s_lshl2_add_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x87] +0x68,0x67,0xe9,0x87 + +# GFX11: s_lshl2_add_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x87] +0x68,0x02,0xe9,0x87 + +# GFX11: s_lshl2_add_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x87] +0x01,0x67,0xe9,0x87 + +# GFX11: s_lshl2_add_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x87] +0x01,0x02,0xe9,0x87 + +# GFX11: s_lshl2_add_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x87] +0x01,0x02,0xeb,0x87 + +# GFX11: s_lshl2_add_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x87] +0x01,0x02,0xea,0x87 + +# GFX11: s_lshl3_add_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x88] +0x01,0x02,0x7f,0x88 + +# GFX11: s_lshl3_add_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x88] +0x01,0x02,0x7e,0x88 + +# GFX11: s_lshl3_add_u32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x88] +0x01,0x02,0x7d,0x88 + +# GFX11: s_lshl3_add_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x88] +0xf0,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x88] +0x80,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x88,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x88,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl3_add_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x88,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x88,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl3_add_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x88] +0xc1,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x88] +0xf7,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x88] +0x7f,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x88] +0x7e,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x88] +0x7d,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x88] +0x68,0x67,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x88] +0x68,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x88] +0x01,0xf0,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x88] +0x01,0x80,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x88,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x88,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl3_add_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x88,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x88,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl3_add_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x88] +0x01,0xc1,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x88] +0x01,0xf7,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x88] +0x01,0x7f,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x88] +0x01,0x7e,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x88] +0x01,0x7d,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x88] +0x01,0x67,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x88] +0x01,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x88] +0x01,0x6b,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x88] +0x01,0x6a,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x88] +0x6b,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x88] +0x6a,0x02,0x00,0x88 + +# GFX11: s_lshl3_add_u32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x88] +0x68,0x67,0x69,0x88 + +# GFX11: s_lshl3_add_u32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x88] +0x68,0x02,0x69,0x88 + +# GFX11: s_lshl3_add_u32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x88] +0x01,0x67,0x69,0x88 + +# GFX11: s_lshl3_add_u32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x88] +0x01,0x02,0x69,0x88 + +# GFX11: s_lshl3_add_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x88] +0x01,0x02,0x6b,0x88 + +# GFX11: s_lshl3_add_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x88] +0x01,0x02,0x6a,0x88 + +# GFX11: s_lshl4_add_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x88] +0x01,0x02,0xff,0x88 + +# GFX11: s_lshl4_add_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x88] +0x01,0x02,0xfe,0x88 + +# GFX11: s_lshl4_add_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x88] +0x01,0x02,0xfd,0x88 + +# GFX11: s_lshl4_add_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x88] +0xf0,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x88] +0x80,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x88,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x88,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl4_add_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x88,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x88,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl4_add_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x88] +0xc1,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x88] +0xf7,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x88] +0x7f,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x88] +0x7e,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x88] +0x7d,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x88] +0x68,0x67,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x88] +0x68,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x88] +0x01,0xf0,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x88] +0x01,0x80,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x88,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x88,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl4_add_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x88,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x88,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl4_add_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x88] +0x01,0xc1,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x88] +0x01,0xf7,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x88] +0x01,0x7f,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x88] +0x01,0x7e,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x88] +0x01,0x7d,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x88] +0x01,0x67,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x88] +0x01,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x88] +0x01,0x6b,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x88] +0x01,0x6a,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x88] +0x6b,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x88] +0x6a,0x02,0x80,0x88 + +# GFX11: s_lshl4_add_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x88] +0x68,0x67,0xe9,0x88 + +# GFX11: s_lshl4_add_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x88] +0x68,0x02,0xe9,0x88 + +# GFX11: s_lshl4_add_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x88] +0x01,0x67,0xe9,0x88 + +# GFX11: s_lshl4_add_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x88] +0x01,0x02,0xe9,0x88 + +# GFX11: s_lshl4_add_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x88] +0x01,0x02,0xeb,0x88 + +# GFX11: s_lshl4_add_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x88] +0x01,0x02,0xea,0x88 + +# GFX11: s_lshl_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x84] +0x01,0x02,0x7f,0x84 + +# GFX11: s_lshl_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x84] +0x01,0x02,0x7e,0x84 + +# GFX11: s_lshl_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x84] +0x01,0x02,0x7d,0x84 + +# GFX11: s_lshl_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x84] +0xf0,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x84] +0x80,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x84,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x84,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x84,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x84,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x84] +0xc1,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x84] +0xf7,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x84] +0x7f,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x84] +0x7e,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x84] +0x7d,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x84] +0x68,0x67,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x84] +0x68,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x84] +0x01,0xf0,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x84] +0x01,0x80,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x84,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x84,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x84,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x84,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x84] +0x01,0xc1,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x84] +0x01,0xf7,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x84] +0x01,0x7f,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x84] +0x01,0x7e,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x84] +0x01,0x7d,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x84] +0x01,0x67,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x84] +0x01,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x84] +0x01,0x6b,0x00,0x84 + +# GFX11: s_lshl_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x84] +0x01,0x6a,0x00,0x84 + +# GFX11: s_lshl_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x84] +0x6b,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x84] +0x6a,0x02,0x00,0x84 + +# GFX11: s_lshl_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x84] +0x68,0x67,0x69,0x84 + +# GFX11: s_lshl_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x84] +0x68,0x02,0x69,0x84 + +# GFX11: s_lshl_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x84] +0x01,0x67,0x69,0x84 + +# GFX11: s_lshl_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x84] +0x01,0x02,0x69,0x84 + +# GFX11: s_lshl_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x84] +0x01,0x02,0x6b,0x84 + +# GFX11: s_lshl_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x84] +0x01,0x02,0x6a,0x84 + +# GFX11: s_lshl_b64 exec, s[2:3], s4 ; encoding: [0x02,0x04,0xfe,0x84] +0x02,0x04,0xfe,0x84 + +# GFX11: s_lshl_b64 s[0:1], 0.5, s4 ; encoding: [0xf0,0x04,0x80,0x84] +0xf0,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], 0, s4 ; encoding: [0x80,0x04,0x80,0x84] +0x80,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], 0x3f717273, s4 ; encoding: [0xff,0x04,0x80,0x84,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x84,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x84] +0xc1,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], -4.0, s4 ; encoding: [0xf7,0x04,0x80,0x84] +0xf7,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], exec, s4 ; encoding: [0x7e,0x04,0x80,0x84] +0x7e,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[102:103], s100 ; encoding: [0x66,0x64,0x80,0x84] +0x66,0x64,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[102:103], s4 ; encoding: [0x66,0x04,0x80,0x84] +0x66,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x84] +0x02,0xf0,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x84] +0x02,0x80,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x84,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x84,0x73,0x72,0x71,0x3f + +# GFX11: s_lshl_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x84,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x84,0x56,0x34,0x12,0xaf + +# GFX11: s_lshl_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x84] +0x02,0xc1,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x84] +0x02,0xf7,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x84] +0x02,0x7e,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], s100 ; encoding: [0x02,0x64,0x80,0x84] +0x02,0x64,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], s4 ; encoding: [0x02,0x04,0x80,0x84] +0x02,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], s[2:3], vcc_lo ; encoding: [0x02,0x6a,0x80,0x84] +0x02,0x6a,0x80,0x84 + +# GFX11: s_lshl_b64 s[0:1], vcc, s4 ; encoding: [0x6a,0x04,0x80,0x84] +0x6a,0x04,0x80,0x84 + +# GFX11: s_lshl_b64 s[104:105], s[102:103], s100 ; encoding: [0x66,0x64,0xe8,0x84] +0x66,0x64,0xe8,0x84 + +# GFX11: s_lshl_b64 s[104:105], s[102:103], s4 ; encoding: [0x66,0x04,0xe8,0x84] +0x66,0x04,0xe8,0x84 + +# GFX11: s_lshl_b64 s[104:105], s[2:3], s100 ; encoding: [0x02,0x64,0xe8,0x84] +0x02,0x64,0xe8,0x84 + +# GFX11: s_lshl_b64 s[104:105], s[2:3], s4 ; encoding: [0x02,0x04,0xe8,0x84] +0x02,0x04,0xe8,0x84 + +# GFX11: s_lshl_b64 vcc, s[2:3], s4 ; encoding: [0x02,0x04,0xea,0x84] +0x02,0x04,0xea,0x84 + +# GFX11: s_lshr_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x85] +0x01,0x02,0x7f,0x85 + +# GFX11: s_lshr_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x85] +0x01,0x02,0x7e,0x85 + +# GFX11: s_lshr_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x85] +0x01,0x02,0x7d,0x85 + +# GFX11: s_lshr_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x85] +0xf0,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x85] +0x80,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x85,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x85,0x73,0x72,0x71,0x3f + +# GFX11: s_lshr_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x85,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x85,0x56,0x34,0x12,0xaf + +# GFX11: s_lshr_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x85] +0xc1,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x85] +0xf7,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x85] +0x7f,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x85] +0x7e,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x85] +0x7d,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x85] +0x68,0x67,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x85] +0x68,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x85] +0x01,0xf0,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x85] +0x01,0x80,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x85,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x85,0x73,0x72,0x71,0x3f + +# GFX11: s_lshr_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x85,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x85,0x56,0x34,0x12,0xaf + +# GFX11: s_lshr_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x85] +0x01,0xc1,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x85] +0x01,0xf7,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x85] +0x01,0x7f,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x85] +0x01,0x7e,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x85] +0x01,0x7d,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x85] +0x01,0x67,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x85] +0x01,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x85] +0x01,0x6b,0x00,0x85 + +# GFX11: s_lshr_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x85] +0x01,0x6a,0x00,0x85 + +# GFX11: s_lshr_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x85] +0x6b,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x85] +0x6a,0x02,0x00,0x85 + +# GFX11: s_lshr_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x85] +0x68,0x67,0x69,0x85 + +# GFX11: s_lshr_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x85] +0x68,0x02,0x69,0x85 + +# GFX11: s_lshr_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x85] +0x01,0x67,0x69,0x85 + +# GFX11: s_lshr_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x85] +0x01,0x02,0x69,0x85 + +# GFX11: s_lshr_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x85] +0x01,0x02,0x6b,0x85 + +# GFX11: s_lshr_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x85] +0x01,0x02,0x6a,0x85 + +# GFX11: s_lshr_b64 exec, s[2:3], s4 ; encoding: [0x02,0x04,0xfe,0x85] +0x02,0x04,0xfe,0x85 + +# GFX11: s_lshr_b64 s[0:1], 0.5, s4 ; encoding: [0xf0,0x04,0x80,0x85] +0xf0,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], 0, s4 ; encoding: [0x80,0x04,0x80,0x85] +0x80,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], 0x3f717273, s4 ; encoding: [0xff,0x04,0x80,0x85,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x85,0x73,0x72,0x71,0x3f + +# GFX11: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf + +# GFX11: s_lshr_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x85] +0xc1,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], -4.0, s4 ; encoding: [0xf7,0x04,0x80,0x85] +0xf7,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], exec, s4 ; encoding: [0x7e,0x04,0x80,0x85] +0x7e,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[102:103], s100 ; encoding: [0x66,0x64,0x80,0x85] +0x66,0x64,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[102:103], s4 ; encoding: [0x66,0x04,0x80,0x85] +0x66,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x85] +0x02,0xf0,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x85] +0x02,0x80,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x85,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x85,0x73,0x72,0x71,0x3f + +# GFX11: s_lshr_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x85,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x85,0x56,0x34,0x12,0xaf + +# GFX11: s_lshr_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x85] +0x02,0xc1,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x85] +0x02,0xf7,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x85] +0x02,0x7e,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], s100 ; encoding: [0x02,0x64,0x80,0x85] +0x02,0x64,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], s4 ; encoding: [0x02,0x04,0x80,0x85] +0x02,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], s[2:3], vcc_lo ; encoding: [0x02,0x6a,0x80,0x85] +0x02,0x6a,0x80,0x85 + +# GFX11: s_lshr_b64 s[0:1], vcc, s4 ; encoding: [0x6a,0x04,0x80,0x85] +0x6a,0x04,0x80,0x85 + +# GFX11: s_lshr_b64 s[104:105], s[102:103], s100 ; encoding: [0x66,0x64,0xe8,0x85] +0x66,0x64,0xe8,0x85 + +# GFX11: s_lshr_b64 s[104:105], s[102:103], s4 ; encoding: [0x66,0x04,0xe8,0x85] +0x66,0x04,0xe8,0x85 + +# GFX11: s_lshr_b64 s[104:105], s[2:3], s100 ; encoding: [0x02,0x64,0xe8,0x85] +0x02,0x64,0xe8,0x85 + +# GFX11: s_lshr_b64 s[104:105], s[2:3], s4 ; encoding: [0x02,0x04,0xe8,0x85] +0x02,0x04,0xe8,0x85 + +# GFX11: s_lshr_b64 vcc, s[2:3], s4 ; encoding: [0x02,0x04,0xea,0x85] +0x02,0x04,0xea,0x85 + +# GFX11: s_max_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8a] +0x01,0x02,0x7f,0x8a + +# GFX11: s_max_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8a] +0x01,0x02,0x7e,0x8a + +# GFX11: s_max_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8a] +0x01,0x02,0x7d,0x8a + +# GFX11: s_max_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8a] +0xf0,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8a] +0x80,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8a,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8a,0x73,0x72,0x71,0x3f + +# GFX11: s_max_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8a,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8a,0x56,0x34,0x12,0xaf + +# GFX11: s_max_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8a] +0xc1,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8a] +0xf7,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8a] +0x7f,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8a] +0x7e,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8a] +0x7d,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8a] +0x68,0x67,0x00,0x8a + +# GFX11: s_max_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8a] +0x68,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8a] +0x01,0xf0,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8a] +0x01,0x80,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8a,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8a,0x73,0x72,0x71,0x3f + +# GFX11: s_max_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8a,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8a,0x56,0x34,0x12,0xaf + +# GFX11: s_max_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8a] +0x01,0xc1,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8a] +0x01,0xf7,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8a] +0x01,0x7f,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8a] +0x01,0x7e,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8a] +0x01,0x7d,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8a] +0x01,0x67,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8a] +0x01,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8a] +0x01,0x6b,0x00,0x8a + +# GFX11: s_max_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8a] +0x01,0x6a,0x00,0x8a + +# GFX11: s_max_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8a] +0x6b,0x02,0x00,0x8a + +# GFX11: s_max_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8a] +0x6a,0x02,0x00,0x8a + +# GFX11: s_max_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8a] +0x68,0x67,0x69,0x8a + +# GFX11: s_max_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8a] +0x68,0x02,0x69,0x8a + +# GFX11: s_max_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8a] +0x01,0x67,0x69,0x8a + +# GFX11: s_max_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8a] +0x01,0x02,0x69,0x8a + +# GFX11: s_max_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8a] +0x01,0x02,0x6b,0x8a + +# GFX11: s_max_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8a] +0x01,0x02,0x6a,0x8a + +# GFX11: s_max_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x8a] +0x01,0x02,0xff,0x8a + +# GFX11: s_max_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x8a] +0x01,0x02,0xfe,0x8a + +# GFX11: s_max_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x8a] +0x01,0x02,0xfd,0x8a + +# GFX11: s_max_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x8a] +0xf0,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x8a] +0x80,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x8a,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x8a,0x73,0x72,0x71,0x3f + +# GFX11: s_max_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x8a,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x8a,0x56,0x34,0x12,0xaf + +# GFX11: s_max_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x8a] +0xc1,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x8a] +0xf7,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x8a] +0x7f,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x8a] +0x7e,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x8a] +0x7d,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x8a] +0x68,0x67,0x80,0x8a + +# GFX11: s_max_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x8a] +0x68,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x8a] +0x01,0xf0,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x8a] +0x01,0x80,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x8a,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x8a,0x73,0x72,0x71,0x3f + +# GFX11: s_max_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x8a,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x8a,0x56,0x34,0x12,0xaf + +# GFX11: s_max_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x8a] +0x01,0xc1,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x8a] +0x01,0xf7,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x8a] +0x01,0x7f,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x8a] +0x01,0x7e,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x8a] +0x01,0x7d,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x8a] +0x01,0x67,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x8a] +0x01,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x8a] +0x01,0x6b,0x80,0x8a + +# GFX11: s_max_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x8a] +0x01,0x6a,0x80,0x8a + +# GFX11: s_max_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x8a] +0x6b,0x02,0x80,0x8a + +# GFX11: s_max_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x8a] +0x6a,0x02,0x80,0x8a + +# GFX11: s_max_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x8a] +0x68,0x67,0xe9,0x8a + +# GFX11: s_max_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x8a] +0x68,0x02,0xe9,0x8a + +# GFX11: s_max_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x8a] +0x01,0x67,0xe9,0x8a + +# GFX11: s_max_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x8a] +0x01,0x02,0xe9,0x8a + +# GFX11: s_max_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x8a] +0x01,0x02,0xeb,0x8a + +# GFX11: s_max_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x8a] +0x01,0x02,0xea,0x8a + +# GFX11: s_min_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x89] +0x01,0x02,0x7f,0x89 + +# GFX11: s_min_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x89] +0x01,0x02,0x7e,0x89 + +# GFX11: s_min_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x89] +0x01,0x02,0x7d,0x89 + +# GFX11: s_min_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x89] +0xf0,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x89] +0x80,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x89,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x89,0x73,0x72,0x71,0x3f + +# GFX11: s_min_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x89,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x89,0x56,0x34,0x12,0xaf + +# GFX11: s_min_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x89] +0xc1,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x89] +0xf7,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x89] +0x7f,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x89] +0x7e,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x89] +0x7d,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x89] +0x68,0x67,0x00,0x89 + +# GFX11: s_min_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x89] +0x68,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x89] +0x01,0xf0,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x89] +0x01,0x80,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x89,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x89,0x73,0x72,0x71,0x3f + +# GFX11: s_min_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x89,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x89,0x56,0x34,0x12,0xaf + +# GFX11: s_min_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x89] +0x01,0xc1,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x89] +0x01,0xf7,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x89] +0x01,0x7f,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x89] +0x01,0x7e,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x89] +0x01,0x7d,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x89] +0x01,0x67,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x89] +0x01,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x89] +0x01,0x6b,0x00,0x89 + +# GFX11: s_min_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x89] +0x01,0x6a,0x00,0x89 + +# GFX11: s_min_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x89] +0x6b,0x02,0x00,0x89 + +# GFX11: s_min_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x89] +0x6a,0x02,0x00,0x89 + +# GFX11: s_min_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x89] +0x68,0x67,0x69,0x89 + +# GFX11: s_min_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x89] +0x68,0x02,0x69,0x89 + +# GFX11: s_min_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x89] +0x01,0x67,0x69,0x89 + +# GFX11: s_min_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x89] +0x01,0x02,0x69,0x89 + +# GFX11: s_min_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x89] +0x01,0x02,0x6b,0x89 + +# GFX11: s_min_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x89] +0x01,0x02,0x6a,0x89 + +# GFX11: s_min_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x89] +0x01,0x02,0xff,0x89 + +# GFX11: s_min_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x89] +0x01,0x02,0xfe,0x89 + +# GFX11: s_min_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x89] +0x01,0x02,0xfd,0x89 + +# GFX11: s_min_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x89] +0xf0,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x89] +0x80,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x89,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x89,0x73,0x72,0x71,0x3f + +# GFX11: s_min_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x89,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x89,0x56,0x34,0x12,0xaf + +# GFX11: s_min_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x89] +0xc1,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x89] +0xf7,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x89] +0x7f,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x89] +0x7e,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x89] +0x7d,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x89] +0x68,0x67,0x80,0x89 + +# GFX11: s_min_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x89] +0x68,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x89] +0x01,0xf0,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x89] +0x01,0x80,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x89,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x89,0x73,0x72,0x71,0x3f + +# GFX11: s_min_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x89,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x89,0x56,0x34,0x12,0xaf + +# GFX11: s_min_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x89] +0x01,0xc1,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x89] +0x01,0xf7,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x89] +0x01,0x7f,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x89] +0x01,0x7e,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x89] +0x01,0x7d,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x89] +0x01,0x67,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x89] +0x01,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x89] +0x01,0x6b,0x80,0x89 + +# GFX11: s_min_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x89] +0x01,0x6a,0x80,0x89 + +# GFX11: s_min_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x89] +0x6b,0x02,0x80,0x89 + +# GFX11: s_min_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x89] +0x6a,0x02,0x80,0x89 + +# GFX11: s_min_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x89] +0x68,0x67,0xe9,0x89 + +# GFX11: s_min_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x89] +0x68,0x02,0xe9,0x89 + +# GFX11: s_min_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x89] +0x01,0x67,0xe9,0x89 + +# GFX11: s_min_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x89] +0x01,0x02,0xe9,0x89 + +# GFX11: s_min_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x89] +0x01,0x02,0xeb,0x89 + +# GFX11: s_min_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x89] +0x01,0x02,0xea,0x89 + +# GFX11: s_mov_b32 exec_hi, s1 ; encoding: [0x01,0x00,0xff,0xbe] +0x01,0x00,0xff,0xbe + +# GFX11: s_mov_b32 exec_lo, s1 ; encoding: [0x01,0x00,0xfe,0xbe] +0x01,0x00,0xfe,0xbe + +# GFX11: s_mov_b32 m0, s1 ; encoding: [0x01,0x00,0xfd,0xbe] +0x01,0x00,0xfd,0xbe + +# GFX11: s_mov_b32 s0, 0.5 ; encoding: [0xf0,0x00,0x80,0xbe] +0xf0,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, 0 ; encoding: [0x80,0x00,0x80,0xbe] +0x80,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, 0x3f717273 ; encoding: [0xff,0x00,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x00,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_mov_b32 s0, 0xaf123456 ; encoding: [0xff,0x00,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x00,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_mov_b32 s0, -1 ; encoding: [0xc1,0x00,0x80,0xbe] +0xc1,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, -4.0 ; encoding: [0xf7,0x00,0x80,0xbe] +0xf7,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, exec_hi ; encoding: [0x7f,0x00,0x80,0xbe] +0x7f,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, exec_lo ; encoding: [0x7e,0x00,0x80,0xbe] +0x7e,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, m0 ; encoding: [0x7d,0x00,0x80,0xbe] +0x7d,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, s104 ; encoding: [0x68,0x00,0x80,0xbe] +0x68,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, s1 ; encoding: [0x01,0x00,0x80,0xbe] +0x01,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, vcc_hi ; encoding: [0x6b,0x00,0x80,0xbe] +0x6b,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s0, vcc_lo ; encoding: [0x6a,0x00,0x80,0xbe] +0x6a,0x00,0x80,0xbe + +# GFX11: s_mov_b32 s105, s104 ; encoding: [0x68,0x00,0xe9,0xbe] +0x68,0x00,0xe9,0xbe + +# GFX11: s_mov_b32 s105, s1 ; encoding: [0x01,0x00,0xe9,0xbe] +0x01,0x00,0xe9,0xbe + +# GFX11: s_mov_b32 vcc_hi, s1 ; encoding: [0x01,0x00,0xeb,0xbe] +0x01,0x00,0xeb,0xbe + +# GFX11: s_mov_b32 vcc_lo, s1 ; encoding: [0x01,0x00,0xea,0xbe] +0x01,0x00,0xea,0xbe + +# GFX11: s_mov_b32 s0, null ; encoding: [0x7c,0x00,0x80,0xbe] +0x7c,0x00,0x80,0xbe + +# GFX11: s_mov_b32 null, s1 ; encoding: [0x01,0x00,0xfc,0xbe] +0x01,0x00,0xfc,0xbe + +# GFX11: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x01,0xfe,0xbe] +0x02,0x01,0xfe,0xbe + +# GFX11: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe] +0xf0,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] +0x80,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x01,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x01,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe] +0xc1,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], -4.0 ; encoding: [0xf7,0x01,0x80,0xbe] +0xf7,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], exec ; encoding: [0x7e,0x01,0x80,0xbe] +0x7e,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], s[102:103] ; encoding: [0x66,0x01,0x80,0xbe] +0x66,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], s[2:3] ; encoding: [0x02,0x01,0x80,0xbe] +0x02,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[0:1], vcc ; encoding: [0x6a,0x01,0x80,0xbe] +0x6a,0x01,0x80,0xbe + +# GFX11: s_mov_b64 s[104:105], s[102:103] ; encoding: [0x66,0x01,0xe8,0xbe] +0x66,0x01,0xe8,0xbe + +# GFX11: s_mov_b64 s[104:105], s[2:3] ; encoding: [0x02,0x01,0xe8,0xbe] +0x02,0x01,0xe8,0xbe + +# GFX11: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x01,0xea,0xbe] +0x02,0x01,0xea,0xbe + +# GFX11: s_mov_b64 s[0:1], null ; encoding: [0x7c,0x01,0x80,0xbe] +0x7c,0x01,0x80,0xbe + +# GFX11: s_mov_b64 null, s[2:3] ; encoding: [0x02,0x01,0xfc,0xbe] +0x02,0x01,0xfc,0xbe + +# GFX11: s_movk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb0] +0x34,0x12,0x7f,0xb0 + +# GFX11: s_movk_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb0] +0x34,0x12,0x7e,0xb0 + +# GFX11: s_movk_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb0] +0x34,0x12,0x7d,0xb0 + +# GFX11: s_movk_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb0] +0x34,0x12,0x00,0xb0 + +# GFX11: s_movk_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb0] +0xd1,0xc1,0x00,0xb0 + +# GFX11: s_movk_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb0] +0x34,0x12,0x69,0xb0 + +# GFX11: s_movk_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb0] +0x34,0x12,0x6b,0xb0 + +# GFX11: s_movk_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb0] +0x34,0x12,0x6a,0xb0 + +# GFX11: s_movreld_b32 s0, 0.5 ; encoding: [0xf0,0x42,0x80,0xbe] +0xf0,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, 0 ; encoding: [0x80,0x42,0x80,0xbe] +0x80,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, 0x3f717273 ; encoding: [0xff,0x42,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x42,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_movreld_b32 s0, 0xaf123456 ; encoding: [0xff,0x42,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x42,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_movreld_b32 s0, -1 ; encoding: [0xc1,0x42,0x80,0xbe] +0xc1,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, -4.0 ; encoding: [0xf7,0x42,0x80,0xbe] +0xf7,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, exec_hi ; encoding: [0x7f,0x42,0x80,0xbe] +0x7f,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, exec_lo ; encoding: [0x7e,0x42,0x80,0xbe] +0x7e,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, m0 ; encoding: [0x7d,0x42,0x80,0xbe] +0x7d,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, s104 ; encoding: [0x68,0x42,0x80,0xbe] +0x68,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, s1 ; encoding: [0x01,0x42,0x80,0xbe] +0x01,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, vcc_hi ; encoding: [0x6b,0x42,0x80,0xbe] +0x6b,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s0, vcc_lo ; encoding: [0x6a,0x42,0x80,0xbe] +0x6a,0x42,0x80,0xbe + +# GFX11: s_movreld_b32 s105, s104 ; encoding: [0x68,0x42,0xe9,0xbe] +0x68,0x42,0xe9,0xbe + +# GFX11: s_movreld_b32 s105, s1 ; encoding: [0x01,0x42,0xe9,0xbe] +0x01,0x42,0xe9,0xbe + +# GFX11: s_movreld_b32 vcc_hi, s1 ; encoding: [0x01,0x42,0xeb,0xbe] +0x01,0x42,0xeb,0xbe + +# GFX11: s_movreld_b32 vcc_lo, s1 ; encoding: [0x01,0x42,0xea,0xbe] +0x01,0x42,0xea,0xbe + +# GFX11: s_movreld_b64 s[0:1], 0.5 ; encoding: [0xf0,0x43,0x80,0xbe] +0xf0,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], 0 ; encoding: [0x80,0x43,0x80,0xbe] +0x80,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x43,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x43,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_movreld_b64 s[0:1], -1 ; encoding: [0xc1,0x43,0x80,0xbe] +0xc1,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], -4.0 ; encoding: [0xf7,0x43,0x80,0xbe] +0xf7,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], exec ; encoding: [0x7e,0x43,0x80,0xbe] +0x7e,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], s[102:103] ; encoding: [0x66,0x43,0x80,0xbe] +0x66,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], s[2:3] ; encoding: [0x02,0x43,0x80,0xbe] +0x02,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[0:1], vcc ; encoding: [0x6a,0x43,0x80,0xbe] +0x6a,0x43,0x80,0xbe + +# GFX11: s_movreld_b64 s[104:105], s[102:103] ; encoding: [0x66,0x43,0xe8,0xbe] +0x66,0x43,0xe8,0xbe + +# GFX11: s_movreld_b64 s[104:105], s[2:3] ; encoding: [0x02,0x43,0xe8,0xbe] +0x02,0x43,0xe8,0xbe + +# GFX11: s_movreld_b64 vcc, s[2:3] ; encoding: [0x02,0x43,0xea,0xbe] +0x02,0x43,0xea,0xbe + +# GFX11: s_movrels_b32 exec_hi, s1 ; encoding: [0x01,0x40,0xff,0xbe] +0x01,0x40,0xff,0xbe + +# GFX11: s_movrels_b32 exec_lo, s1 ; encoding: [0x01,0x40,0xfe,0xbe] +0x01,0x40,0xfe,0xbe + +# GFX11: s_movrels_b32 m0, s1 ; encoding: [0x01,0x40,0xfd,0xbe] +0x01,0x40,0xfd,0xbe + +# GFX11: s_movrels_b32 s0, s104 ; encoding: [0x68,0x40,0x80,0xbe] +0x68,0x40,0x80,0xbe + +# GFX11: s_movrels_b32 s0, s1 ; encoding: [0x01,0x40,0x80,0xbe] +0x01,0x40,0x80,0xbe + +# GFX11: s_movrels_b32 s0, vcc_hi ; encoding: [0x6b,0x40,0x80,0xbe] +0x6b,0x40,0x80,0xbe + +# GFX11: s_movrels_b32 s0, vcc_lo ; encoding: [0x6a,0x40,0x80,0xbe] +0x6a,0x40,0x80,0xbe + +# GFX11: s_movrels_b32 s105, s104 ; encoding: [0x68,0x40,0xe9,0xbe] +0x68,0x40,0xe9,0xbe + +# GFX11: s_movrels_b32 s105, s1 ; encoding: [0x01,0x40,0xe9,0xbe] +0x01,0x40,0xe9,0xbe + +# GFX11: s_movrels_b32 vcc_hi, s1 ; encoding: [0x01,0x40,0xeb,0xbe] +0x01,0x40,0xeb,0xbe + +# GFX11: s_movrels_b32 vcc_lo, s1 ; encoding: [0x01,0x40,0xea,0xbe] +0x01,0x40,0xea,0xbe + +# GFX11: s_movrels_b64 exec, s[2:3] ; encoding: [0x02,0x41,0xfe,0xbe] +0x02,0x41,0xfe,0xbe + +# GFX11: s_movrels_b64 s[0:1], s[102:103] ; encoding: [0x66,0x41,0x80,0xbe] +0x66,0x41,0x80,0xbe + +# GFX11: s_movrels_b64 s[0:1], s[2:3] ; encoding: [0x02,0x41,0x80,0xbe] +0x02,0x41,0x80,0xbe + +# GFX11: s_movrels_b64 s[0:1], vcc ; encoding: [0x6a,0x41,0x80,0xbe] +0x6a,0x41,0x80,0xbe + +# GFX11: s_movrels_b64 s[104:105], s[102:103] ; encoding: [0x66,0x41,0xe8,0xbe] +0x66,0x41,0xe8,0xbe + +# GFX11: s_movrels_b64 s[104:105], s[2:3] ; encoding: [0x02,0x41,0xe8,0xbe] +0x02,0x41,0xe8,0xbe + +# GFX11: s_movrels_b64 vcc, s[2:3] ; encoding: [0x02,0x41,0xea,0xbe] +0x02,0x41,0xea,0xbe + +# GFX11: s_movrelsd_2_b32 s0, s104 ; encoding: [0x68,0x44,0x80,0xbe] +0x68,0x44,0x80,0xbe + +# GFX11: s_movrelsd_2_b32 s0, s1 ; encoding: [0x01,0x44,0x80,0xbe] +0x01,0x44,0x80,0xbe + +# GFX11: s_movrelsd_2_b32 s0, vcc_hi ; encoding: [0x6b,0x44,0x80,0xbe] +0x6b,0x44,0x80,0xbe + +# GFX11: s_movrelsd_2_b32 s0, vcc_lo ; encoding: [0x6a,0x44,0x80,0xbe] +0x6a,0x44,0x80,0xbe + +# GFX11: s_movrelsd_2_b32 s105, s104 ; encoding: [0x68,0x44,0xe9,0xbe] +0x68,0x44,0xe9,0xbe + +# GFX11: s_movrelsd_2_b32 s105, s1 ; encoding: [0x01,0x44,0xe9,0xbe] +0x01,0x44,0xe9,0xbe + +# GFX11: s_movrelsd_2_b32 vcc_hi, s1 ; encoding: [0x01,0x44,0xeb,0xbe] +0x01,0x44,0xeb,0xbe + +# GFX11: s_movrelsd_2_b32 vcc_lo, s1 ; encoding: [0x01,0x44,0xea,0xbe] +0x01,0x44,0xea,0xbe + +# GFX11: s_mul_hi_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x97] +0x01,0x02,0x7f,0x97 + +# GFX11: s_mul_hi_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x97] +0x01,0x02,0x7e,0x97 + +# GFX11: s_mul_hi_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x97] +0x01,0x02,0x7d,0x97 + +# GFX11: s_mul_hi_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x97] +0xf0,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x97] +0x80,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x97,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x97,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_hi_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x97,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x97,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_hi_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x97] +0xc1,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x97] +0xf7,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x97] +0x7f,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x97] +0x7e,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x97] +0x7d,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x97] +0x68,0x67,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x97] +0x68,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x97] +0x01,0xf0,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x97] +0x01,0x80,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x97,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x97,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_hi_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x97,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x97,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_hi_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x97] +0x01,0xc1,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x97] +0x01,0xf7,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x97] +0x01,0x7f,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x97] +0x01,0x7e,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x97] +0x01,0x7d,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x97] +0x01,0x67,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x97] +0x01,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x97] +0x01,0x6b,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x97] +0x01,0x6a,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x97] +0x6b,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x97] +0x6a,0x02,0x00,0x97 + +# GFX11: s_mul_hi_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x97] +0x68,0x67,0x69,0x97 + +# GFX11: s_mul_hi_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x97] +0x68,0x02,0x69,0x97 + +# GFX11: s_mul_hi_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x97] +0x01,0x67,0x69,0x97 + +# GFX11: s_mul_hi_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x97] +0x01,0x02,0x69,0x97 + +# GFX11: s_mul_hi_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x97] +0x01,0x02,0x6b,0x97 + +# GFX11: s_mul_hi_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x97] +0x01,0x02,0x6a,0x97 + +# GFX11: s_mul_hi_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x96] +0x01,0x02,0xff,0x96 + +# GFX11: s_mul_hi_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x96] +0x01,0x02,0xfe,0x96 + +# GFX11: s_mul_hi_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x96] +0x01,0x02,0xfd,0x96 + +# GFX11: s_mul_hi_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x96] +0xf0,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x96] +0x80,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x96,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x96,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_hi_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x96,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x96,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_hi_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x96] +0xc1,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x96] +0xf7,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x96] +0x7f,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x96] +0x7e,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x96] +0x7d,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x96] +0x68,0x67,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x96] +0x68,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x96] +0x01,0xf0,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x96] +0x01,0x80,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x96,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x96,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_hi_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x96,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x96,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_hi_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x96] +0x01,0xc1,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x96] +0x01,0xf7,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x96] +0x01,0x7f,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x96] +0x01,0x7e,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x96] +0x01,0x7d,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x96] +0x01,0x67,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x96] +0x01,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x96] +0x01,0x6b,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x96] +0x01,0x6a,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x96] +0x6b,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x96] +0x6a,0x02,0x80,0x96 + +# GFX11: s_mul_hi_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x96] +0x68,0x67,0xe9,0x96 + +# GFX11: s_mul_hi_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x96] +0x68,0x02,0xe9,0x96 + +# GFX11: s_mul_hi_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x96] +0x01,0x67,0xe9,0x96 + +# GFX11: s_mul_hi_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x96] +0x01,0x02,0xe9,0x96 + +# GFX11: s_mul_hi_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x96] +0x01,0x02,0xeb,0x96 + +# GFX11: s_mul_hi_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x96] +0x01,0x02,0xea,0x96 + +# GFX11: s_mul_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x96] +0x01,0x02,0x7f,0x96 + +# GFX11: s_mul_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x96] +0x01,0x02,0x7e,0x96 + +# GFX11: s_mul_i32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x96] +0x01,0x02,0x7d,0x96 + +# GFX11: s_mul_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x96] +0xf0,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x96] +0x80,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x96,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x96,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x96,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x96,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x96] +0xc1,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x96] +0xf7,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x96] +0x7f,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x96] +0x7e,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x96] +0x7d,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x96] +0x68,0x67,0x00,0x96 + +# GFX11: s_mul_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x96] +0x68,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x96] +0x01,0xf0,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x96] +0x01,0x80,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x96,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x96,0x73,0x72,0x71,0x3f + +# GFX11: s_mul_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x96,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x96,0x56,0x34,0x12,0xaf + +# GFX11: s_mul_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x96] +0x01,0xc1,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x96] +0x01,0xf7,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x96] +0x01,0x7f,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x96] +0x01,0x7e,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x96] +0x01,0x7d,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x96] +0x01,0x67,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x96] +0x01,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x96] +0x01,0x6b,0x00,0x96 + +# GFX11: s_mul_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x96] +0x01,0x6a,0x00,0x96 + +# GFX11: s_mul_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x96] +0x6b,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x96] +0x6a,0x02,0x00,0x96 + +# GFX11: s_mul_i32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x96] +0x68,0x67,0x69,0x96 + +# GFX11: s_mul_i32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x96] +0x68,0x02,0x69,0x96 + +# GFX11: s_mul_i32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x96] +0x01,0x67,0x69,0x96 + +# GFX11: s_mul_i32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x96] +0x01,0x02,0x69,0x96 + +# GFX11: s_mul_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x96] +0x01,0x02,0x6b,0x96 + +# GFX11: s_mul_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x96] +0x01,0x02,0x6a,0x96 + +# GFX11: s_mulk_i32 exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xb8] +0x34,0x12,0x7f,0xb8 + +# GFX11: s_mulk_i32 exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xb8] +0x34,0x12,0x7e,0xb8 + +# GFX11: s_mulk_i32 m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xb8] +0x34,0x12,0x7d,0xb8 + +# GFX11: s_mulk_i32 s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xb8] +0x34,0x12,0x00,0xb8 + +# GFX11: s_mulk_i32 s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xb8] +0xd1,0xc1,0x00,0xb8 + +# GFX11: s_mulk_i32 s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xb8] +0x34,0x12,0x69,0xb8 + +# GFX11: s_mulk_i32 vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xb8] +0x34,0x12,0x6b,0xb8 + +# GFX11: s_mulk_i32 vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xb8] +0x34,0x12,0x6a,0xb8 + +# GFX11: s_nand_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8e] +0x01,0x02,0x7f,0x8e + +# GFX11: s_nand_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8e] +0x01,0x02,0x7e,0x8e + +# GFX11: s_nand_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8e] +0x01,0x02,0x7d,0x8e + +# GFX11: s_nand_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8e] +0xf0,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8e] +0x80,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8e,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8e,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8e,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8e,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8e] +0xc1,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8e] +0xf7,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8e] +0x7f,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8e] +0x7e,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8e] +0x7d,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8e] +0x68,0x67,0x00,0x8e + +# GFX11: s_nand_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8e] +0x68,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8e] +0x01,0xf0,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8e] +0x01,0x80,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8e,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8e,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8e,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8e,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8e] +0x01,0xc1,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8e] +0x01,0xf7,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8e] +0x01,0x7f,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8e] +0x01,0x7e,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8e] +0x01,0x7d,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8e] +0x01,0x67,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8e] +0x01,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8e] +0x01,0x6b,0x00,0x8e + +# GFX11: s_nand_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8e] +0x01,0x6a,0x00,0x8e + +# GFX11: s_nand_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8e] +0x6b,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8e] +0x6a,0x02,0x00,0x8e + +# GFX11: s_nand_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8e] +0x68,0x67,0x69,0x8e + +# GFX11: s_nand_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8e] +0x68,0x02,0x69,0x8e + +# GFX11: s_nand_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8e] +0x01,0x67,0x69,0x8e + +# GFX11: s_nand_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8e] +0x01,0x02,0x69,0x8e + +# GFX11: s_nand_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8e] +0x01,0x02,0x6b,0x8e + +# GFX11: s_nand_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8e] +0x01,0x02,0x6a,0x8e + +# GFX11: s_nand_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x8e] +0x02,0x04,0xfe,0x8e + +# GFX11: s_nand_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x8e] +0xf0,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x8e] +0x80,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x8e,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8e] +0xc1,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x8e] +0xf7,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x8e] +0x7e,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x8e] +0x66,0x64,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x8e] +0x66,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x8e] +0x02,0xf0,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x8e] +0x02,0x80,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x8e,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x8e,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8e] +0x02,0xc1,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x8e] +0x02,0xf7,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8e] +0x02,0x7e,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x8e] +0x02,0x64,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x8e] +0x02,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x8e] +0x02,0x6a,0x80,0x8e + +# GFX11: s_nand_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x8e] +0x6a,0x04,0x80,0x8e + +# GFX11: s_nand_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x8e] +0x66,0x64,0xe8,0x8e + +# GFX11: s_nand_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x8e] +0x66,0x04,0xe8,0x8e + +# GFX11: s_nand_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x8e] +0x02,0x64,0xe8,0x8e + +# GFX11: s_nand_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x8e] +0x02,0x04,0xe8,0x8e + +# GFX11: s_nand_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x8e] +0x02,0x04,0xea,0x8e + +# GFX11: s_nand_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x26,0x80,0xbe] +0xf0,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, 0 ; encoding: [0x80,0x26,0x80,0xbe] +0x80,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x26,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x26,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x26,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x26,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_saveexec_b32 s0, -1 ; encoding: [0xc1,0x26,0x80,0xbe] +0xc1,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x26,0x80,0xbe] +0xf7,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x26,0x80,0xbe] +0x7f,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x26,0x80,0xbe] +0x7e,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, m0 ; encoding: [0x7d,0x26,0x80,0xbe] +0x7d,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, s104 ; encoding: [0x68,0x26,0x80,0xbe] +0x68,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, s1 ; encoding: [0x01,0x26,0x80,0xbe] +0x01,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x26,0x80,0xbe] +0x6b,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x26,0x80,0xbe] +0x6a,0x26,0x80,0xbe + +# GFX11: s_nand_saveexec_b32 s105, s104 ; encoding: [0x68,0x26,0xe9,0xbe] +0x68,0x26,0xe9,0xbe + +# GFX11: s_nand_saveexec_b32 s105, s1 ; encoding: [0x01,0x26,0xe9,0xbe] +0x01,0x26,0xe9,0xbe + +# GFX11: s_nand_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x26,0xeb,0xbe] +0x01,0x26,0xeb,0xbe + +# GFX11: s_nand_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x26,0xea,0xbe] +0x01,0x26,0xea,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x27,0x80,0xbe] +0xf0,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x27,0x80,0xbe] +0x80,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x27,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x27,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_nand_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x27,0x80,0xbe] +0xc1,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x27,0x80,0xbe] +0xf7,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x27,0x80,0xbe] +0x7e,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x27,0x80,0xbe] +0x66,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x27,0x80,0xbe] +0x02,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x27,0x80,0xbe] +0x6a,0x27,0x80,0xbe + +# GFX11: s_nand_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x27,0xe8,0xbe] +0x66,0x27,0xe8,0xbe + +# GFX11: s_nand_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x27,0xe8,0xbe] +0x02,0x27,0xe8,0xbe + +# GFX11: s_nand_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x27,0xea,0xbe] +0x02,0x27,0xea,0xbe + +# GFX11: s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf] +0x00,0x00,0x80,0xbf + +# GFX11: s_nop 0x1234 ; encoding: [0x34,0x12,0x80,0xbf] +0x34,0x12,0x80,0xbf + +# GFX11: s_nop 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xbf] +0xd1,0xc1,0x80,0xbf + +# GFX11: s_nor_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8f] +0x01,0x02,0x7f,0x8f + +# GFX11: s_nor_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8f] +0x01,0x02,0x7e,0x8f + +# GFX11: s_nor_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8f] +0x01,0x02,0x7d,0x8f + +# GFX11: s_nor_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8f] +0xf0,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8f] +0x80,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8f,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8f,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8f,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8f,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8f] +0xc1,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8f] +0xf7,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8f] +0x7f,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8f] +0x7e,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8f] +0x7d,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8f] +0x68,0x67,0x00,0x8f + +# GFX11: s_nor_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8f] +0x68,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8f] +0x01,0xf0,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8f] +0x01,0x80,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8f,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8f,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8f,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8f,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8f] +0x01,0xc1,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8f] +0x01,0xf7,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8f] +0x01,0x7f,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8f] +0x01,0x7e,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8f] +0x01,0x7d,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8f] +0x01,0x67,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8f] +0x01,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8f] +0x01,0x6b,0x00,0x8f + +# GFX11: s_nor_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8f] +0x01,0x6a,0x00,0x8f + +# GFX11: s_nor_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8f] +0x6b,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8f] +0x6a,0x02,0x00,0x8f + +# GFX11: s_nor_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8f] +0x68,0x67,0x69,0x8f + +# GFX11: s_nor_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8f] +0x68,0x02,0x69,0x8f + +# GFX11: s_nor_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8f] +0x01,0x67,0x69,0x8f + +# GFX11: s_nor_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8f] +0x01,0x02,0x69,0x8f + +# GFX11: s_nor_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8f] +0x01,0x02,0x6b,0x8f + +# GFX11: s_nor_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8f] +0x01,0x02,0x6a,0x8f + +# GFX11: s_nor_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x8f] +0x02,0x04,0xfe,0x8f + +# GFX11: s_nor_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x8f] +0xf0,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x8f] +0x80,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x8f,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8f] +0xc1,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x8f] +0xf7,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x8f] +0x7e,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x8f] +0x66,0x64,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x8f] +0x66,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x8f] +0x02,0xf0,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x8f] +0x02,0x80,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x8f,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x8f,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8f] +0x02,0xc1,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x8f] +0x02,0xf7,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8f] +0x02,0x7e,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x8f] +0x02,0x64,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x8f] +0x02,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x8f] +0x02,0x6a,0x80,0x8f + +# GFX11: s_nor_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x8f] +0x6a,0x04,0x80,0x8f + +# GFX11: s_nor_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x8f] +0x66,0x64,0xe8,0x8f + +# GFX11: s_nor_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x8f] +0x66,0x04,0xe8,0x8f + +# GFX11: s_nor_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x8f] +0x02,0x64,0xe8,0x8f + +# GFX11: s_nor_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x8f] +0x02,0x04,0xe8,0x8f + +# GFX11: s_nor_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x8f] +0x02,0x04,0xea,0x8f + +# GFX11: s_nor_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x28,0x80,0xbe] +0xf0,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, 0 ; encoding: [0x80,0x28,0x80,0xbe] +0x80,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x28,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x28,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x28,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x28,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_saveexec_b32 s0, -1 ; encoding: [0xc1,0x28,0x80,0xbe] +0xc1,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x28,0x80,0xbe] +0xf7,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x28,0x80,0xbe] +0x7f,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x28,0x80,0xbe] +0x7e,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, m0 ; encoding: [0x7d,0x28,0x80,0xbe] +0x7d,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, s104 ; encoding: [0x68,0x28,0x80,0xbe] +0x68,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, s1 ; encoding: [0x01,0x28,0x80,0xbe] +0x01,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x28,0x80,0xbe] +0x6b,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x28,0x80,0xbe] +0x6a,0x28,0x80,0xbe + +# GFX11: s_nor_saveexec_b32 s105, s104 ; encoding: [0x68,0x28,0xe9,0xbe] +0x68,0x28,0xe9,0xbe + +# GFX11: s_nor_saveexec_b32 s105, s1 ; encoding: [0x01,0x28,0xe9,0xbe] +0x01,0x28,0xe9,0xbe + +# GFX11: s_nor_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x28,0xeb,0xbe] +0x01,0x28,0xeb,0xbe + +# GFX11: s_nor_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x28,0xea,0xbe] +0x01,0x28,0xea,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x29,0x80,0xbe] +0xf0,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x29,0x80,0xbe] +0x80,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x29,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x29,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_nor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x29,0x80,0xbe] +0xc1,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x29,0x80,0xbe] +0xf7,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x29,0x80,0xbe] +0x7e,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x29,0x80,0xbe] +0x66,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x29,0x80,0xbe] +0x02,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x29,0x80,0xbe] +0x6a,0x29,0x80,0xbe + +# GFX11: s_nor_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x29,0xe8,0xbe] +0x66,0x29,0xe8,0xbe + +# GFX11: s_nor_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x29,0xe8,0xbe] +0x02,0x29,0xe8,0xbe + +# GFX11: s_nor_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x29,0xea,0xbe] +0x02,0x29,0xea,0xbe + +# GFX11: s_not_b32 exec_hi, s1 ; encoding: [0x01,0x1e,0xff,0xbe] +0x01,0x1e,0xff,0xbe + +# GFX11: s_not_b32 exec_lo, s1 ; encoding: [0x01,0x1e,0xfe,0xbe] +0x01,0x1e,0xfe,0xbe + +# GFX11: s_not_b32 m0, s1 ; encoding: [0x01,0x1e,0xfd,0xbe] +0x01,0x1e,0xfd,0xbe + +# GFX11: s_not_b32 s0, 0.5 ; encoding: [0xf0,0x1e,0x80,0xbe] +0xf0,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, 0 ; encoding: [0x80,0x1e,0x80,0xbe] +0x80,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, 0x3f717273 ; encoding: [0xff,0x1e,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1e,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_not_b32 s0, 0xaf123456 ; encoding: [0xff,0x1e,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1e,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_not_b32 s0, -1 ; encoding: [0xc1,0x1e,0x80,0xbe] +0xc1,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, -4.0 ; encoding: [0xf7,0x1e,0x80,0xbe] +0xf7,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, exec_hi ; encoding: [0x7f,0x1e,0x80,0xbe] +0x7f,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, exec_lo ; encoding: [0x7e,0x1e,0x80,0xbe] +0x7e,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, m0 ; encoding: [0x7d,0x1e,0x80,0xbe] +0x7d,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, s104 ; encoding: [0x68,0x1e,0x80,0xbe] +0x68,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, s1 ; encoding: [0x01,0x1e,0x80,0xbe] +0x01,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, vcc_hi ; encoding: [0x6b,0x1e,0x80,0xbe] +0x6b,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s0, vcc_lo ; encoding: [0x6a,0x1e,0x80,0xbe] +0x6a,0x1e,0x80,0xbe + +# GFX11: s_not_b32 s105, s104 ; encoding: [0x68,0x1e,0xe9,0xbe] +0x68,0x1e,0xe9,0xbe + +# GFX11: s_not_b32 s105, s1 ; encoding: [0x01,0x1e,0xe9,0xbe] +0x01,0x1e,0xe9,0xbe + +# GFX11: s_not_b32 vcc_hi, s1 ; encoding: [0x01,0x1e,0xeb,0xbe] +0x01,0x1e,0xeb,0xbe + +# GFX11: s_not_b32 vcc_lo, s1 ; encoding: [0x01,0x1e,0xea,0xbe] +0x01,0x1e,0xea,0xbe + +# GFX11: s_not_b64 exec, s[2:3] ; encoding: [0x02,0x1f,0xfe,0xbe] +0x02,0x1f,0xfe,0xbe + +# GFX11: s_not_b64 s[0:1], 0.5 ; encoding: [0xf0,0x1f,0x80,0xbe] +0xf0,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], 0 ; encoding: [0x80,0x1f,0x80,0xbe] +0x80,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x1f,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1f,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_not_b64 s[0:1], -1 ; encoding: [0xc1,0x1f,0x80,0xbe] +0xc1,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], -4.0 ; encoding: [0xf7,0x1f,0x80,0xbe] +0xf7,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], exec ; encoding: [0x7e,0x1f,0x80,0xbe] +0x7e,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], s[102:103] ; encoding: [0x66,0x1f,0x80,0xbe] +0x66,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], s[2:3] ; encoding: [0x02,0x1f,0x80,0xbe] +0x02,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[0:1], vcc ; encoding: [0x6a,0x1f,0x80,0xbe] +0x6a,0x1f,0x80,0xbe + +# GFX11: s_not_b64 s[104:105], s[102:103] ; encoding: [0x66,0x1f,0xe8,0xbe] +0x66,0x1f,0xe8,0xbe + +# GFX11: s_not_b64 s[104:105], s[2:3] ; encoding: [0x02,0x1f,0xe8,0xbe] +0x02,0x1f,0xe8,0xbe + +# GFX11: s_not_b64 vcc, s[2:3] ; encoding: [0x02,0x1f,0xea,0xbe] +0x02,0x1f,0xea,0xbe + +# GFX11: s_or_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8c] +0x01,0x02,0x7f,0x8c + +# GFX11: s_or_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8c] +0x01,0x02,0x7e,0x8c + +# GFX11: s_or_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8c] +0x01,0x02,0x7d,0x8c + +# GFX11: s_or_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8c] +0xf0,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8c] +0x80,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8c,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8c,0x73,0x72,0x71,0x3f + +# GFX11: s_or_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8c,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8c,0x56,0x34,0x12,0xaf + +# GFX11: s_or_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8c] +0xc1,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8c] +0xf7,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8c] +0x7f,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8c] +0x7e,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8c] +0x7d,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8c] +0x68,0x67,0x00,0x8c + +# GFX11: s_or_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8c] +0x68,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8c] +0x01,0xf0,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8c] +0x01,0x80,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8c,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8c,0x73,0x72,0x71,0x3f + +# GFX11: s_or_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8c,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8c,0x56,0x34,0x12,0xaf + +# GFX11: s_or_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8c] +0x01,0xc1,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8c] +0x01,0xf7,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8c] +0x01,0x7f,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8c] +0x01,0x7e,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8c] +0x01,0x7d,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8c] +0x01,0x67,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8c] +0x01,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8c] +0x01,0x6b,0x00,0x8c + +# GFX11: s_or_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8c] +0x01,0x6a,0x00,0x8c + +# GFX11: s_or_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8c] +0x6b,0x02,0x00,0x8c + +# GFX11: s_or_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8c] +0x6a,0x02,0x00,0x8c + +# GFX11: s_or_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8c] +0x68,0x67,0x69,0x8c + +# GFX11: s_or_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8c] +0x68,0x02,0x69,0x8c + +# GFX11: s_or_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8c] +0x01,0x67,0x69,0x8c + +# GFX11: s_or_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8c] +0x01,0x02,0x69,0x8c + +# GFX11: s_or_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8c] +0x01,0x02,0x6b,0x8c + +# GFX11: s_or_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8c] +0x01,0x02,0x6a,0x8c + +# GFX11: s_or_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x8c] +0x02,0x04,0xfe,0x8c + +# GFX11: s_or_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x8c] +0xf0,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x8c] +0x80,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x8c,0x73,0x72,0x71,0x3f + +# GFX11: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf + +# GFX11: s_or_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8c] +0xc1,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x8c] +0xf7,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x8c] +0x7e,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x8c] +0x66,0x64,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x8c] +0x66,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x8c] +0x02,0xf0,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x8c] +0x02,0x80,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x8c,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x8c,0x73,0x72,0x71,0x3f + +# GFX11: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf + +# GFX11: s_or_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8c] +0x02,0xc1,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x8c] +0x02,0xf7,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8c] +0x02,0x7e,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x8c] +0x02,0x64,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x8c] +0x02,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x8c] +0x02,0x6a,0x80,0x8c + +# GFX11: s_or_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x8c] +0x6a,0x04,0x80,0x8c + +# GFX11: s_or_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x8c] +0x66,0x64,0xe8,0x8c + +# GFX11: s_or_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x8c] +0x66,0x04,0xe8,0x8c + +# GFX11: s_or_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x8c] +0x02,0x64,0xe8,0x8c + +# GFX11: s_or_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x8c] +0x02,0x04,0xe8,0x8c + +# GFX11: s_or_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x8c] +0x02,0x04,0xea,0x8c + +# GFX11: s_or_not0_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x2e,0x80,0xbe] +0xf0,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, 0 ; encoding: [0x80,0x2e,0x80,0xbe] +0x80,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x2e,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2e,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not0_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x2e,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2e,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not0_saveexec_b32 s0, -1 ; encoding: [0xc1,0x2e,0x80,0xbe] +0xc1,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x2e,0x80,0xbe] +0xf7,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x2e,0x80,0xbe] +0x7f,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x2e,0x80,0xbe] +0x7e,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, m0 ; encoding: [0x7d,0x2e,0x80,0xbe] +0x7d,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, s104 ; encoding: [0x68,0x2e,0x80,0xbe] +0x68,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, s1 ; encoding: [0x01,0x2e,0x80,0xbe] +0x01,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x2e,0x80,0xbe] +0x6b,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x2e,0x80,0xbe] +0x6a,0x2e,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b32 s105, s104 ; encoding: [0x68,0x2e,0xe9,0xbe] +0x68,0x2e,0xe9,0xbe + +# GFX11: s_or_not0_saveexec_b32 s105, s1 ; encoding: [0x01,0x2e,0xe9,0xbe] +0x01,0x2e,0xe9,0xbe + +# GFX11: s_or_not0_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x2e,0xeb,0xbe] +0x01,0x2e,0xeb,0xbe + +# GFX11: s_or_not0_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x2e,0xea,0xbe] +0x01,0x2e,0xea,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x2f,0x80,0xbe] +0xf0,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x2f,0x80,0xbe] +0x80,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x2f,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2f,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2f,0x80,0xbe] +0xc1,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x2f,0x80,0xbe] +0xf7,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x2f,0x80,0xbe] +0x7e,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x2f,0x80,0xbe] +0x66,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2f,0x80,0xbe] +0x02,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x2f,0x80,0xbe] +0x6a,0x2f,0x80,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x2f,0xe8,0xbe] +0x66,0x2f,0xe8,0xbe + +# GFX11: s_or_not0_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x2f,0xe8,0xbe] +0x02,0x2f,0xe8,0xbe + +# GFX11: s_or_not0_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x2f,0xea,0xbe] +0x02,0x2f,0xea,0xbe + +# GFX11: s_or_not1_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x92] +0x01,0x02,0x7f,0x92 + +# GFX11: s_or_not1_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x92] +0x01,0x02,0x7e,0x92 + +# GFX11: s_or_not1_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x92] +0x01,0x02,0x7d,0x92 + +# GFX11: s_or_not1_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x92] +0xf0,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x92] +0x80,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x92,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x92,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x92,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x92,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x92] +0xc1,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x92] +0xf7,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x92] +0x7f,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x92] +0x7e,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x92] +0x7d,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x92] +0x68,0x67,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x92] +0x68,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x92] +0x01,0xf0,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x92] +0x01,0x80,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x92,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x92,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x92,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x92,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x92] +0x01,0xc1,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x92] +0x01,0xf7,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x92] +0x01,0x7f,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x92] +0x01,0x7e,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x92] +0x01,0x7d,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x92] +0x01,0x67,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x92] +0x01,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x92] +0x01,0x6b,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x92] +0x01,0x6a,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x92] +0x6b,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x92] +0x6a,0x02,0x00,0x92 + +# GFX11: s_or_not1_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x92] +0x68,0x67,0x69,0x92 + +# GFX11: s_or_not1_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x92] +0x68,0x02,0x69,0x92 + +# GFX11: s_or_not1_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x92] +0x01,0x67,0x69,0x92 + +# GFX11: s_or_not1_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x92] +0x01,0x02,0x69,0x92 + +# GFX11: s_or_not1_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x92] +0x01,0x02,0x6b,0x92 + +# GFX11: s_or_not1_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x92] +0x01,0x02,0x6a,0x92 + +# GFX11: s_or_not1_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x92] +0x02,0x04,0xfe,0x92 + +# GFX11: s_or_not1_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x92] +0xf0,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x92] +0x80,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x92,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x92] +0xc1,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x92] +0xf7,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x92] +0x7e,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x92] +0x66,0x64,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x92] +0x66,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x92] +0x02,0xf0,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x92] +0x02,0x80,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x92,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x92,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x92] +0x02,0xc1,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x92] +0x02,0xf7,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x92] +0x02,0x7e,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x92] +0x02,0x64,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x92] +0x02,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x92] +0x02,0x6a,0x80,0x92 + +# GFX11: s_or_not1_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x92] +0x6a,0x04,0x80,0x92 + +# GFX11: s_or_not1_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x92] +0x66,0x64,0xe8,0x92 + +# GFX11: s_or_not1_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x92] +0x66,0x04,0xe8,0x92 + +# GFX11: s_or_not1_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x92] +0x02,0x64,0xe8,0x92 + +# GFX11: s_or_not1_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x92] +0x02,0x04,0xe8,0x92 + +# GFX11: s_or_not1_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x92] +0x02,0x04,0xea,0x92 + +# GFX11: s_or_not1_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x32,0x80,0xbe] +0xf0,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, 0 ; encoding: [0x80,0x32,0x80,0xbe] +0x80,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x32,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x32,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x32,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x32,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_saveexec_b32 s0, -1 ; encoding: [0xc1,0x32,0x80,0xbe] +0xc1,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x32,0x80,0xbe] +0xf7,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x32,0x80,0xbe] +0x7f,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x32,0x80,0xbe] +0x7e,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, m0 ; encoding: [0x7d,0x32,0x80,0xbe] +0x7d,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, s104 ; encoding: [0x68,0x32,0x80,0xbe] +0x68,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, s1 ; encoding: [0x01,0x32,0x80,0xbe] +0x01,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x32,0x80,0xbe] +0x6b,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x32,0x80,0xbe] +0x6a,0x32,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b32 s105, s104 ; encoding: [0x68,0x32,0xe9,0xbe] +0x68,0x32,0xe9,0xbe + +# GFX11: s_or_not1_saveexec_b32 s105, s1 ; encoding: [0x01,0x32,0xe9,0xbe] +0x01,0x32,0xe9,0xbe + +# GFX11: s_or_not1_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x32,0xeb,0xbe] +0x01,0x32,0xeb,0xbe + +# GFX11: s_or_not1_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x32,0xea,0xbe] +0x01,0x32,0xea,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x33,0x80,0xbe] +0xf0,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x33,0x80,0xbe] +0x80,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x33,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x33,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x33,0x80,0xbe] +0xc1,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x33,0x80,0xbe] +0xf7,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x33,0x80,0xbe] +0x7e,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x33,0x80,0xbe] +0x66,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x33,0x80,0xbe] +0x02,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x33,0x80,0xbe] +0x6a,0x33,0x80,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x33,0xe8,0xbe] +0x66,0x33,0xe8,0xbe + +# GFX11: s_or_not1_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x33,0xe8,0xbe] +0x02,0x33,0xe8,0xbe + +# GFX11: s_or_not1_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x33,0xea,0xbe] +0x02,0x33,0xea,0xbe + +# GFX11: s_or_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x22,0x80,0xbe] +0xf0,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, 0 ; encoding: [0x80,0x22,0x80,0xbe] +0x80,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x22,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x22,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x22,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x22,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_saveexec_b32 s0, -1 ; encoding: [0xc1,0x22,0x80,0xbe] +0xc1,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x22,0x80,0xbe] +0xf7,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x22,0x80,0xbe] +0x7f,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x22,0x80,0xbe] +0x7e,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, m0 ; encoding: [0x7d,0x22,0x80,0xbe] +0x7d,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, s104 ; encoding: [0x68,0x22,0x80,0xbe] +0x68,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, s1 ; encoding: [0x01,0x22,0x80,0xbe] +0x01,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x22,0x80,0xbe] +0x6b,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x22,0x80,0xbe] +0x6a,0x22,0x80,0xbe + +# GFX11: s_or_saveexec_b32 s105, s104 ; encoding: [0x68,0x22,0xe9,0xbe] +0x68,0x22,0xe9,0xbe + +# GFX11: s_or_saveexec_b32 s105, s1 ; encoding: [0x01,0x22,0xe9,0xbe] +0x01,0x22,0xe9,0xbe + +# GFX11: s_or_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x22,0xeb,0xbe] +0x01,0x22,0xeb,0xbe + +# GFX11: s_or_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x22,0xea,0xbe] +0x01,0x22,0xea,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x23,0x80,0xbe] +0xf0,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x23,0x80,0xbe] +0x80,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x23,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x23,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_or_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x23,0x80,0xbe] +0xc1,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x23,0x80,0xbe] +0xf7,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x23,0x80,0xbe] +0x7e,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x23,0x80,0xbe] +0x66,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x23,0x80,0xbe] +0x02,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x23,0x80,0xbe] +0x6a,0x23,0x80,0xbe + +# GFX11: s_or_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x23,0xe8,0xbe] +0x66,0x23,0xe8,0xbe + +# GFX11: s_or_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x23,0xe8,0xbe] +0x02,0x23,0xe8,0xbe + +# GFX11: s_or_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x23,0xea,0xbe] +0x02,0x23,0xea,0xbe + +# GFX11: s_pack_hh_b32_b16 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x9a] +0x01,0x02,0x7f,0x9a + +# GFX11: s_pack_hh_b32_b16 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x9a] +0x01,0x02,0x7e,0x9a + +# GFX11: s_pack_hh_b32_b16 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x9a] +0x01,0x02,0x7d,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x9a] +0xf0,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x9a] +0x80,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x9a,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x9a,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_hh_b32_b16 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x9a,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x9a,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_hh_b32_b16 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x9a] +0xc1,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x9a] +0xf7,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x9a] +0x7f,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x9a] +0x7e,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x9a] +0x7d,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x9a] +0x68,0x67,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x9a] +0x68,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x9a] +0x01,0xf0,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x9a] +0x01,0x80,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x9a,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x9a,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_hh_b32_b16 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x9a,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x9a,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_hh_b32_b16 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x9a] +0x01,0xc1,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x9a] +0x01,0xf7,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x9a] +0x01,0x7f,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x9a] +0x01,0x7e,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x9a] +0x01,0x7d,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x9a] +0x01,0x67,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x9a] +0x01,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x9a] +0x01,0x6b,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x9a] +0x01,0x6a,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x9a] +0x6b,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x9a] +0x6a,0x02,0x00,0x9a + +# GFX11: s_pack_hh_b32_b16 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x9a] +0x68,0x67,0x69,0x9a + +# GFX11: s_pack_hh_b32_b16 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x9a] +0x68,0x02,0x69,0x9a + +# GFX11: s_pack_hh_b32_b16 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x9a] +0x01,0x67,0x69,0x9a + +# GFX11: s_pack_hh_b32_b16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x9a] +0x01,0x02,0x69,0x9a + +# GFX11: s_pack_hh_b32_b16 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x9a] +0x01,0x02,0x6b,0x9a + +# GFX11: s_pack_hh_b32_b16 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x9a] +0x01,0x02,0x6a,0x9a + +# GFX11: s_pack_lh_b32_b16 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x99] +0x01,0x02,0xff,0x99 + +# GFX11: s_pack_lh_b32_b16 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x99] +0x01,0x02,0xfe,0x99 + +# GFX11: s_pack_lh_b32_b16 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x99] +0x01,0x02,0xfd,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x99] +0xf0,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x99] +0x80,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x99,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x99,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_lh_b32_b16 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x99,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x99,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_lh_b32_b16 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x99] +0xc1,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x99] +0xf7,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x99] +0x7f,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x99] +0x7e,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x99] +0x7d,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x99] +0x68,0x67,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x99] +0x68,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x99] +0x01,0xf0,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x99] +0x01,0x80,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x99,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x99,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_lh_b32_b16 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x99,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x99,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_lh_b32_b16 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x99] +0x01,0xc1,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x99] +0x01,0xf7,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x99] +0x01,0x7f,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x99] +0x01,0x7e,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x99] +0x01,0x7d,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x99] +0x01,0x67,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x99] +0x01,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x99] +0x01,0x6b,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x99] +0x01,0x6a,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x99] +0x6b,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x99] +0x6a,0x02,0x80,0x99 + +# GFX11: s_pack_lh_b32_b16 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x99] +0x68,0x67,0xe9,0x99 + +# GFX11: s_pack_lh_b32_b16 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x99] +0x68,0x02,0xe9,0x99 + +# GFX11: s_pack_lh_b32_b16 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x99] +0x01,0x67,0xe9,0x99 + +# GFX11: s_pack_lh_b32_b16 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x99] +0x01,0x02,0xe9,0x99 + +# GFX11: s_pack_lh_b32_b16 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x99] +0x01,0x02,0xeb,0x99 + +# GFX11: s_pack_lh_b32_b16 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x99] +0x01,0x02,0xea,0x99 + +# GFX11: s_pack_ll_b32_b16 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x99] +0x01,0x02,0x7f,0x99 + +# GFX11: s_pack_ll_b32_b16 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x99] +0x01,0x02,0x7e,0x99 + +# GFX11: s_pack_ll_b32_b16 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x99] +0x01,0x02,0x7d,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x99] +0xf0,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x99] +0x80,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x99,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x99,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_ll_b32_b16 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x99,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x99,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_ll_b32_b16 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x99] +0xc1,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x99] +0xf7,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x99] +0x7f,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x99] +0x7e,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x99] +0x7d,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x99] +0x68,0x67,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x99] +0x68,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x99] +0x01,0xf0,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x99] +0x01,0x80,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x99,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x99,0x73,0x72,0x71,0x3f + +# GFX11: s_pack_ll_b32_b16 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x99,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x99,0x56,0x34,0x12,0xaf + +# GFX11: s_pack_ll_b32_b16 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x99] +0x01,0xc1,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x99] +0x01,0xf7,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x99] +0x01,0x7f,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x99] +0x01,0x7e,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x99] +0x01,0x7d,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x99] +0x01,0x67,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x99] +0x01,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x99] +0x01,0x6b,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x99] +0x01,0x6a,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x99] +0x6b,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x99] +0x6a,0x02,0x00,0x99 + +# GFX11: s_pack_ll_b32_b16 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x99] +0x68,0x67,0x69,0x99 + +# GFX11: s_pack_ll_b32_b16 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x99] +0x68,0x02,0x69,0x99 + +# GFX11: s_pack_ll_b32_b16 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x99] +0x01,0x67,0x69,0x99 + +# GFX11: s_pack_ll_b32_b16 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x99] +0x01,0x02,0x69,0x99 + +# GFX11: s_pack_ll_b32_b16 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x99] +0x01,0x02,0x6b,0x99 + +# GFX11: s_pack_ll_b32_b16 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x99] +0x01,0x02,0x6a,0x99 + +# GFX11: s_quadmask_b32 exec_hi, s1 ; encoding: [0x01,0x1a,0xff,0xbe] +0x01,0x1a,0xff,0xbe + +# GFX11: s_quadmask_b32 exec_lo, s1 ; encoding: [0x01,0x1a,0xfe,0xbe] +0x01,0x1a,0xfe,0xbe + +# GFX11: s_quadmask_b32 m0, s1 ; encoding: [0x01,0x1a,0xfd,0xbe] +0x01,0x1a,0xfd,0xbe + +# GFX11: s_quadmask_b32 s0, 0.5 ; encoding: [0xf0,0x1a,0x80,0xbe] +0xf0,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, 0 ; encoding: [0x80,0x1a,0x80,0xbe] +0x80,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, 0x3f717273 ; encoding: [0xff,0x1a,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1a,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_quadmask_b32 s0, 0xaf123456 ; encoding: [0xff,0x1a,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1a,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_quadmask_b32 s0, -1 ; encoding: [0xc1,0x1a,0x80,0xbe] +0xc1,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, -4.0 ; encoding: [0xf7,0x1a,0x80,0xbe] +0xf7,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, exec_hi ; encoding: [0x7f,0x1a,0x80,0xbe] +0x7f,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, exec_lo ; encoding: [0x7e,0x1a,0x80,0xbe] +0x7e,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, m0 ; encoding: [0x7d,0x1a,0x80,0xbe] +0x7d,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, s104 ; encoding: [0x68,0x1a,0x80,0xbe] +0x68,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, s1 ; encoding: [0x01,0x1a,0x80,0xbe] +0x01,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, vcc_hi ; encoding: [0x6b,0x1a,0x80,0xbe] +0x6b,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s0, vcc_lo ; encoding: [0x6a,0x1a,0x80,0xbe] +0x6a,0x1a,0x80,0xbe + +# GFX11: s_quadmask_b32 s105, s104 ; encoding: [0x68,0x1a,0xe9,0xbe] +0x68,0x1a,0xe9,0xbe + +# GFX11: s_quadmask_b32 s105, s1 ; encoding: [0x01,0x1a,0xe9,0xbe] +0x01,0x1a,0xe9,0xbe + +# GFX11: s_quadmask_b32 vcc_hi, s1 ; encoding: [0x01,0x1a,0xeb,0xbe] +0x01,0x1a,0xeb,0xbe + +# GFX11: s_quadmask_b32 vcc_lo, s1 ; encoding: [0x01,0x1a,0xea,0xbe] +0x01,0x1a,0xea,0xbe + +# GFX11: s_quadmask_b64 exec, s[2:3] ; encoding: [0x02,0x1b,0xfe,0xbe] +0x02,0x1b,0xfe,0xbe + +# GFX11: s_quadmask_b64 s[0:1], 0.5 ; encoding: [0xf0,0x1b,0x80,0xbe] +0xf0,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], 0 ; encoding: [0x80,0x1b,0x80,0xbe] +0x80,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x1b,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1b,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_quadmask_b64 s[0:1], -1 ; encoding: [0xc1,0x1b,0x80,0xbe] +0xc1,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], -4.0 ; encoding: [0xf7,0x1b,0x80,0xbe] +0xf7,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], exec ; encoding: [0x7e,0x1b,0x80,0xbe] +0x7e,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], s[102:103] ; encoding: [0x66,0x1b,0x80,0xbe] +0x66,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], s[2:3] ; encoding: [0x02,0x1b,0x80,0xbe] +0x02,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[0:1], vcc ; encoding: [0x6a,0x1b,0x80,0xbe] +0x6a,0x1b,0x80,0xbe + +# GFX11: s_quadmask_b64 s[104:105], s[102:103] ; encoding: [0x66,0x1b,0xe8,0xbe] +0x66,0x1b,0xe8,0xbe + +# GFX11: s_quadmask_b64 s[104:105], s[2:3] ; encoding: [0x02,0x1b,0xe8,0xbe] +0x02,0x1b,0xe8,0xbe + +# GFX11: s_quadmask_b64 vcc, s[2:3] ; encoding: [0x02,0x1b,0xea,0xbe] +0x02,0x1b,0xea,0xbe + +# GFX11: s_rfe_b64 s[0:1] ; encoding: [0x00,0x4a,0x80,0xbe] +0x00,0x4a,0x80,0xbe + +# GFX11: s_rfe_b64 s[104:105] ; encoding: [0x68,0x4a,0x80,0xbe] +0x68,0x4a,0x80,0xbe + +# GFX11: s_rfe_b64 vcc ; encoding: [0x6a,0x4a,0x80,0xbe] +0x6a,0x4a,0x80,0xbe + +# GFX11: s_round_mode 0x0 ; encoding: [0x00,0x00,0x91,0xbf] +0x00,0x00,0x91,0xbf + +# GFX11: s_round_mode 0x1234 ; encoding: [0x34,0x12,0x91,0xbf] +0x34,0x12,0x91,0xbf + +# GFX11: s_round_mode 0xc1d1 ; encoding: [0xd1,0xc1,0x91,0xbf] +0xd1,0xc1,0x91,0xbf + +# GFX11: s_sendmsg 4660 ; encoding: [0x34,0x12,0xb6,0xbf] +0x34,0x12,0xb6,0xbf + +# GFX11: s_sendmsg 49617 ; encoding: [0xd1,0xc1,0xb6,0xbf] +0xd1,0xc1,0xb6,0xbf + +# GFX11: s_sendmsghalt 4660 ; encoding: [0x34,0x12,0xb7,0xbf] +0x34,0x12,0xb7,0xbf + +# GFX11: s_sendmsghalt 49617 ; encoding: [0xd1,0xc1,0xb7,0xbf] +0xd1,0xc1,0xb7,0xbf + +# GFX11: s_sendmsghalt sendmsg(0, 0, 0) ; encoding: [0x00,0x00,0xb7,0xbf] +0x00,0x00,0xb7,0xbf + +# GFX11: s_sendmsg sendmsg(0, 0, 0) ; encoding: [0x00,0x00,0xb6,0xbf] +0x00,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_GET_DOORBELL) ; encoding: [0x80,0x00,0xb6,0xbf] +0x80,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_GET_DDID) ; encoding: [0x81,0x00,0xb6,0xbf] +0x81,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_GET_TMA) ; encoding: [0x82,0x00,0xb6,0xbf] +0x82,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_GET_REALTIME) ; encoding: [0x83,0x00,0xb6,0xbf] +0x83,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_SAVE_WAVE) ; encoding: [0x84,0x00,0xb6,0xbf] +0x84,0x00,0xb6,0xbf + +# GFX11: s_sendmsg sendmsg(MSG_RTN_GET_TBA) ; encoding: [0x85,0x00,0xb6,0xbf] +0x85,0x00,0xb6,0xbf + +# GFX11: s_sendmsg_rtn_b32 s1, sendmsg(0, 0, 0) ; encoding: [0x00,0x4c,0x81,0xbe] +0x00,0x4c,0x81,0xbe + +# GFX11: s_sendmsg_rtn_b32 s2, sendmsg(18, 0, 0) ; encoding: [0x12,0x4c,0x82,0xbe] +0x12,0x4c,0x82,0xbe + +# GFX11: s_sendmsg_rtn_b32 s3, sendmsg(255, 0, 0) ; encoding: [0xff,0x4c,0x83,0xbe] +0xff,0x4c,0x83,0xbe + +# GFX11: s_sendmsg_rtn_b64 s[0:1], sendmsg(0, 0, 0) ; encoding: [0x00,0x4d,0x80,0xbe] +0x00,0x4d,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b64 s[2:3], sendmsg(18, 0, 0) ; encoding: [0x12,0x4d,0x82,0xbe] +0x12,0x4d,0x82,0xbe + +# GFX11: s_sendmsg_rtn_b64 s[4:5], sendmsg(255, 0, 0) ; encoding: [0xff,0x4d,0x84,0xbe] +0xff,0x4d,0x84,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL) ; encoding: [0x80,0x4c,0x80,0xbe] +0x80,0x4c,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DDID) ; encoding: [0x81,0x4c,0x80,0xbe] +0x81,0x4c,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TMA) ; encoding: [0x82,0x4c,0x80,0xbe] +0x82,0x4c,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_REALTIME) ; encoding: [0x83,0x4c,0x80,0xbe] +0x83,0x4c,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_SAVE_WAVE) ; encoding: [0x84,0x4c,0x80,0xbe] +0x84,0x4c,0x80,0xbe + +# GFX11: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_TBA) ; encoding: [0x85,0x4c,0x80,0xbe] +0x85,0x4c,0x80,0xbe + +# GFX11: s_sethalt 0 ; encoding: [0x00,0x00,0x82,0xbf] +0x00,0x00,0x82,0xbf + +# GFX11: s_sethalt 0x1234 ; encoding: [0x34,0x12,0x82,0xbf] +0x34,0x12,0x82,0xbf + +# GFX11: s_sethalt 0xc1d1 ; encoding: [0xd1,0xc1,0x82,0xbf] +0xd1,0xc1,0x82,0xbf + +# GFX11: s_set_inst_prefetch_distance 0x0 ; encoding: [0x00,0x00,0x84,0xbf] +0x00,0x00,0x84,0xbf + +# GFX11: s_set_inst_prefetch_distance 0x1234 ; encoding: [0x34,0x12,0x84,0xbf] +0x34,0x12,0x84,0xbf + +# GFX11: s_set_inst_prefetch_distance 0xc1d1 ; encoding: [0xd1,0xc1,0x84,0xbf] +0xd1,0xc1,0x84,0xbf + +# GFX11: s_setkill 0 ; encoding: [0x00,0x00,0x81,0xbf] +0x00,0x00,0x81,0xbf + +# GFX11: s_setkill 0x1234 ; encoding: [0x34,0x12,0x81,0xbf] +0x34,0x12,0x81,0xbf + +# GFX11: s_setkill 0xc1d1 ; encoding: [0xd1,0xc1,0x81,0xbf] +0xd1,0xc1,0x81,0xbf + +# GFX11: s_setpc_b64 s[0:1] ; encoding: [0x00,0x48,0x80,0xbe] +0x00,0x48,0x80,0xbe + +# GFX11: s_setpc_b64 s[104:105] ; encoding: [0x68,0x48,0x80,0xbe] +0x68,0x48,0x80,0xbe + +# GFX11: s_setpc_b64 vcc ; encoding: [0x6a,0x48,0x80,0xbe] +0x6a,0x48,0x80,0xbe + +# GFX11: s_setprio 0 ; encoding: [0x00,0x00,0xb5,0xbf] +0x00,0x00,0xb5,0xbf + +# GFX11: s_setprio 0x1234 ; encoding: [0x34,0x12,0xb5,0xbf] +0x34,0x12,0xb5,0xbf + +# GFX11: s_setprio 0xc1d1 ; encoding: [0xd1,0xc1,0xb5,0xbf] +0xd1,0xc1,0xb5,0xbf + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), exec_hi ; encoding: [0x34,0x12,0x7f,0xb9] +0x34,0x12,0x7f,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), exec_lo ; encoding: [0x34,0x12,0x7e,0xb9] +0x34,0x12,0x7e,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), m0 ; encoding: [0x34,0x12,0x7d,0xb9] +0x34,0x12,0x7d,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), s0 ; encoding: [0x34,0x12,0x00,0xb9] +0x34,0x12,0x00,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), s105 ; encoding: [0x34,0x12,0x69,0xb9] +0x34,0x12,0x69,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), vcc_hi ; encoding: [0x34,0x12,0x6b,0xb9] +0x34,0x12,0x6b,0xb9 + +# GFX11: s_setreg_b32 hwreg(52, 8, 3), vcc_lo ; encoding: [0x34,0x12,0x6a,0xb9] +0x34,0x12,0x6a,0xb9 + +# GFX11: s_setreg_b32 hwreg(17, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9] +0xd1,0xc1,0x00,0xb9 + +# GFX11: s_sext_i32_i16 exec_hi, s1 ; encoding: [0x01,0x0f,0xff,0xbe] +0x01,0x0f,0xff,0xbe + +# GFX11: s_sext_i32_i16 exec_lo, s1 ; encoding: [0x01,0x0f,0xfe,0xbe] +0x01,0x0f,0xfe,0xbe + +# GFX11: s_sext_i32_i16 m0, s1 ; encoding: [0x01,0x0f,0xfd,0xbe] +0x01,0x0f,0xfd,0xbe + +# GFX11: s_sext_i32_i16 s0, 0.5 ; encoding: [0xf0,0x0f,0x80,0xbe] +0xf0,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, 0 ; encoding: [0x80,0x0f,0x80,0xbe] +0x80,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, 0x3f717273 ; encoding: [0xff,0x0f,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0f,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_sext_i32_i16 s0, 0xaf123456 ; encoding: [0xff,0x0f,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0f,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_sext_i32_i16 s0, -1 ; encoding: [0xc1,0x0f,0x80,0xbe] +0xc1,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, -4.0 ; encoding: [0xf7,0x0f,0x80,0xbe] +0xf7,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, exec_hi ; encoding: [0x7f,0x0f,0x80,0xbe] +0x7f,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, exec_lo ; encoding: [0x7e,0x0f,0x80,0xbe] +0x7e,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, m0 ; encoding: [0x7d,0x0f,0x80,0xbe] +0x7d,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, s104 ; encoding: [0x68,0x0f,0x80,0xbe] +0x68,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, s1 ; encoding: [0x01,0x0f,0x80,0xbe] +0x01,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, vcc_hi ; encoding: [0x6b,0x0f,0x80,0xbe] +0x6b,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s0, vcc_lo ; encoding: [0x6a,0x0f,0x80,0xbe] +0x6a,0x0f,0x80,0xbe + +# GFX11: s_sext_i32_i16 s105, s104 ; encoding: [0x68,0x0f,0xe9,0xbe] +0x68,0x0f,0xe9,0xbe + +# GFX11: s_sext_i32_i16 s105, s1 ; encoding: [0x01,0x0f,0xe9,0xbe] +0x01,0x0f,0xe9,0xbe + +# GFX11: s_sext_i32_i16 vcc_hi, s1 ; encoding: [0x01,0x0f,0xeb,0xbe] +0x01,0x0f,0xeb,0xbe + +# GFX11: s_sext_i32_i16 vcc_lo, s1 ; encoding: [0x01,0x0f,0xea,0xbe] +0x01,0x0f,0xea,0xbe + +# GFX11: s_sext_i32_i8 exec_hi, s1 ; encoding: [0x01,0x0e,0xff,0xbe] +0x01,0x0e,0xff,0xbe + +# GFX11: s_sext_i32_i8 exec_lo, s1 ; encoding: [0x01,0x0e,0xfe,0xbe] +0x01,0x0e,0xfe,0xbe + +# GFX11: s_sext_i32_i8 m0, s1 ; encoding: [0x01,0x0e,0xfd,0xbe] +0x01,0x0e,0xfd,0xbe + +# GFX11: s_sext_i32_i8 s0, 0.5 ; encoding: [0xf0,0x0e,0x80,0xbe] +0xf0,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, 0 ; encoding: [0x80,0x0e,0x80,0xbe] +0x80,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, 0x3f717273 ; encoding: [0xff,0x0e,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x0e,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_sext_i32_i8 s0, 0xaf123456 ; encoding: [0xff,0x0e,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x0e,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_sext_i32_i8 s0, -1 ; encoding: [0xc1,0x0e,0x80,0xbe] +0xc1,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, -4.0 ; encoding: [0xf7,0x0e,0x80,0xbe] +0xf7,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, exec_hi ; encoding: [0x7f,0x0e,0x80,0xbe] +0x7f,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, exec_lo ; encoding: [0x7e,0x0e,0x80,0xbe] +0x7e,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, m0 ; encoding: [0x7d,0x0e,0x80,0xbe] +0x7d,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, s104 ; encoding: [0x68,0x0e,0x80,0xbe] +0x68,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, s1 ; encoding: [0x01,0x0e,0x80,0xbe] +0x01,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, vcc_hi ; encoding: [0x6b,0x0e,0x80,0xbe] +0x6b,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s0, vcc_lo ; encoding: [0x6a,0x0e,0x80,0xbe] +0x6a,0x0e,0x80,0xbe + +# GFX11: s_sext_i32_i8 s105, s104 ; encoding: [0x68,0x0e,0xe9,0xbe] +0x68,0x0e,0xe9,0xbe + +# GFX11: s_sext_i32_i8 s105, s1 ; encoding: [0x01,0x0e,0xe9,0xbe] +0x01,0x0e,0xe9,0xbe + +# GFX11: s_sext_i32_i8 vcc_hi, s1 ; encoding: [0x01,0x0e,0xeb,0xbe] +0x01,0x0e,0xeb,0xbe + +# GFX11: s_sext_i32_i8 vcc_lo, s1 ; encoding: [0x01,0x0e,0xea,0xbe] +0x01,0x0e,0xea,0xbe + +# GFX11: s_sleep 0 ; encoding: [0x00,0x00,0x83,0xbf] +0x00,0x00,0x83,0xbf + +# GFX11: s_sleep 0x1234 ; encoding: [0x34,0x12,0x83,0xbf] +0x34,0x12,0x83,0xbf + +# GFX11: s_sleep 0xc1d1 ; encoding: [0xd1,0xc1,0x83,0xbf] +0xd1,0xc1,0x83,0xbf + +# GFX11: s_subb_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x82] +0x01,0x02,0xff,0x82 + +# GFX11: s_subb_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x82] +0x01,0x02,0xfe,0x82 + +# GFX11: s_subb_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x82] +0x01,0x02,0xfd,0x82 + +# GFX11: s_subb_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x82] +0xf0,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x82] +0x80,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x82,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x82,0x73,0x72,0x71,0x3f + +# GFX11: s_subb_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x82,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x82,0x56,0x34,0x12,0xaf + +# GFX11: s_subb_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x82] +0xc1,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x82] +0xf7,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x82] +0x7f,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x82] +0x7e,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x82] +0x7d,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x82] +0x68,0x67,0x80,0x82 + +# GFX11: s_subb_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x82] +0x68,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x82] +0x01,0xf0,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x82] +0x01,0x80,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x82,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x82,0x73,0x72,0x71,0x3f + +# GFX11: s_subb_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x82,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x82,0x56,0x34,0x12,0xaf + +# GFX11: s_subb_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x82] +0x01,0xc1,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x82] +0x01,0xf7,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x82] +0x01,0x7f,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x82] +0x01,0x7e,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x82] +0x01,0x7d,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x82] +0x01,0x67,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x82] +0x01,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x82] +0x01,0x6b,0x80,0x82 + +# GFX11: s_subb_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x82] +0x01,0x6a,0x80,0x82 + +# GFX11: s_subb_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x82] +0x6b,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x82] +0x6a,0x02,0x80,0x82 + +# GFX11: s_subb_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x82] +0x68,0x67,0xe9,0x82 + +# GFX11: s_subb_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x82] +0x68,0x02,0xe9,0x82 + +# GFX11: s_subb_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x82] +0x01,0x67,0xe9,0x82 + +# GFX11: s_subb_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x82] +0x01,0x02,0xe9,0x82 + +# GFX11: s_subb_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x82] +0x01,0x02,0xeb,0x82 + +# GFX11: s_subb_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x82] +0x01,0x02,0xea,0x82 + +# GFX11: s_sub_i32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x81] +0x01,0x02,0xff,0x81 + +# GFX11: s_sub_i32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x81] +0x01,0x02,0xfe,0x81 + +# GFX11: s_sub_i32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x81] +0x01,0x02,0xfd,0x81 + +# GFX11: s_sub_i32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x81] +0xf0,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x81] +0x80,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x81,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x81,0x73,0x72,0x71,0x3f + +# GFX11: s_sub_i32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x81,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x81,0x56,0x34,0x12,0xaf + +# GFX11: s_sub_i32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x81] +0xc1,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x81] +0xf7,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x81] +0x7f,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x81] +0x7e,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x81] +0x7d,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x81] +0x68,0x67,0x80,0x81 + +# GFX11: s_sub_i32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x81] +0x68,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x81] +0x01,0xf0,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x81] +0x01,0x80,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x81,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x81,0x73,0x72,0x71,0x3f + +# GFX11: s_sub_i32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x81,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x81,0x56,0x34,0x12,0xaf + +# GFX11: s_sub_i32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x81] +0x01,0xc1,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x81] +0x01,0xf7,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x81] +0x01,0x7f,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x81] +0x01,0x7e,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x81] +0x01,0x7d,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x81] +0x01,0x67,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x81] +0x01,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x81] +0x01,0x6b,0x80,0x81 + +# GFX11: s_sub_i32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x81] +0x01,0x6a,0x80,0x81 + +# GFX11: s_sub_i32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x81] +0x6b,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x81] +0x6a,0x02,0x80,0x81 + +# GFX11: s_sub_i32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x81] +0x68,0x67,0xe9,0x81 + +# GFX11: s_sub_i32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x81] +0x68,0x02,0xe9,0x81 + +# GFX11: s_sub_i32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x81] +0x01,0x67,0xe9,0x81 + +# GFX11: s_sub_i32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x81] +0x01,0x02,0xe9,0x81 + +# GFX11: s_sub_i32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x81] +0x01,0x02,0xeb,0x81 + +# GFX11: s_sub_i32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x81] +0x01,0x02,0xea,0x81 + +# GFX11: s_sub_u32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0xff,0x80] +0x01,0x02,0xff,0x80 + +# GFX11: s_sub_u32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0xfe,0x80] +0x01,0x02,0xfe,0x80 + +# GFX11: s_sub_u32 m0, s1, s2 ; encoding: [0x01,0x02,0xfd,0x80] +0x01,0x02,0xfd,0x80 + +# GFX11: s_sub_u32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x80,0x80] +0xf0,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, 0, s2 ; encoding: [0x80,0x02,0x80,0x80] +0x80,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x80,0x80,0x73,0x72,0x71,0x3f] +0xff,0x02,0x80,0x80,0x73,0x72,0x71,0x3f + +# GFX11: s_sub_u32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x80,0x80,0x56,0x34,0x12,0xaf] +0xff,0x02,0x80,0x80,0x56,0x34,0x12,0xaf + +# GFX11: s_sub_u32 s0, -1, s2 ; encoding: [0xc1,0x02,0x80,0x80] +0xc1,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x80,0x80] +0xf7,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x80,0x80] +0x7f,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x80,0x80] +0x7e,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, m0, s2 ; encoding: [0x7d,0x02,0x80,0x80] +0x7d,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, s104, s103 ; encoding: [0x68,0x67,0x80,0x80] +0x68,0x67,0x80,0x80 + +# GFX11: s_sub_u32 s0, s104, s2 ; encoding: [0x68,0x02,0x80,0x80] +0x68,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x80,0x80] +0x01,0xf0,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, 0 ; encoding: [0x01,0x80,0x80,0x80] +0x01,0x80,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x80,0x80,0x73,0x72,0x71,0x3f] +0x01,0xff,0x80,0x80,0x73,0x72,0x71,0x3f + +# GFX11: s_sub_u32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x80,0x80,0x56,0x34,0x12,0xaf] +0x01,0xff,0x80,0x80,0x56,0x34,0x12,0xaf + +# GFX11: s_sub_u32 s0, s1, -1 ; encoding: [0x01,0xc1,0x80,0x80] +0x01,0xc1,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x80,0x80] +0x01,0xf7,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x80,0x80] +0x01,0x7f,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x80,0x80] +0x01,0x7e,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, m0 ; encoding: [0x01,0x7d,0x80,0x80] +0x01,0x7d,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, s103 ; encoding: [0x01,0x67,0x80,0x80] +0x01,0x67,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, s2 ; encoding: [0x01,0x02,0x80,0x80] +0x01,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x80,0x80] +0x01,0x6b,0x80,0x80 + +# GFX11: s_sub_u32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x80,0x80] +0x01,0x6a,0x80,0x80 + +# GFX11: s_sub_u32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x80,0x80] +0x6b,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x80,0x80] +0x6a,0x02,0x80,0x80 + +# GFX11: s_sub_u32 s105, s104, s103 ; encoding: [0x68,0x67,0xe9,0x80] +0x68,0x67,0xe9,0x80 + +# GFX11: s_sub_u32 s105, s104, s2 ; encoding: [0x68,0x02,0xe9,0x80] +0x68,0x02,0xe9,0x80 + +# GFX11: s_sub_u32 s105, s1, s103 ; encoding: [0x01,0x67,0xe9,0x80] +0x01,0x67,0xe9,0x80 + +# GFX11: s_sub_u32 s105, s1, s2 ; encoding: [0x01,0x02,0xe9,0x80] +0x01,0x02,0xe9,0x80 + +# GFX11: s_sub_u32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0xeb,0x80] +0x01,0x02,0xeb,0x80 + +# GFX11: s_sub_u32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0xea,0x80] +0x01,0x02,0xea,0x80 + +# GFX11: s_subvector_loop_begin exec_hi, 4660 ; encoding: [0x34,0x12,0x7f,0xbb] +0x34,0x12,0x7f,0xbb + +# GFX11: s_subvector_loop_begin exec_lo, 4660 ; encoding: [0x34,0x12,0x7e,0xbb] +0x34,0x12,0x7e,0xbb + +# GFX11: s_subvector_loop_begin m0, 4660 ; encoding: [0x34,0x12,0x7d,0xbb] +0x34,0x12,0x7d,0xbb + +# GFX11: s_subvector_loop_begin s0, 4660 ; encoding: [0x34,0x12,0x00,0xbb] +0x34,0x12,0x00,0xbb + +# GFX11: s_subvector_loop_begin s105, 4660 ; encoding: [0x34,0x12,0x69,0xbb] +0x34,0x12,0x69,0xbb + +# GFX11: s_subvector_loop_begin vcc_hi, 4660 ; encoding: [0x34,0x12,0x6b,0xbb] +0x34,0x12,0x6b,0xbb + +# GFX11: s_subvector_loop_begin vcc_lo, 4660 ; encoding: [0x34,0x12,0x6a,0xbb] +0x34,0x12,0x6a,0xbb + +# GFX11: s_subvector_loop_end exec_hi, 4660 ; encoding: [0x34,0x12,0xff,0xbb] +0x34,0x12,0xff,0xbb + +# GFX11: s_subvector_loop_end exec_lo, 4660 ; encoding: [0x34,0x12,0xfe,0xbb] +0x34,0x12,0xfe,0xbb + +# GFX11: s_subvector_loop_end m0, 4660 ; encoding: [0x34,0x12,0xfd,0xbb] +0x34,0x12,0xfd,0xbb + +# GFX11: s_subvector_loop_end s0, 4660 ; encoding: [0x34,0x12,0x80,0xbb] +0x34,0x12,0x80,0xbb + +# GFX11: s_subvector_loop_end s105, 4660 ; encoding: [0x34,0x12,0xe9,0xbb] +0x34,0x12,0xe9,0xbb + +# GFX11: s_subvector_loop_end vcc_hi, 4660 ; encoding: [0x34,0x12,0xeb,0xbb] +0x34,0x12,0xeb,0xbb + +# GFX11: s_subvector_loop_end vcc_lo, 4660 ; encoding: [0x34,0x12,0xea,0xbb] +0x34,0x12,0xea,0xbb + +# GFX11: s_swappc_b64 s[0:1], s[102:103] ; encoding: [0x66,0x49,0x80,0xbe] +0x66,0x49,0x80,0xbe + +# GFX11: s_swappc_b64 s[0:1], s[2:3] ; encoding: [0x02,0x49,0x80,0xbe] +0x02,0x49,0x80,0xbe + +# GFX11: s_swappc_b64 s[0:1], vcc ; encoding: [0x6a,0x49,0x80,0xbe] +0x6a,0x49,0x80,0xbe + +# GFX11: s_swappc_b64 s[104:105], s[102:103] ; encoding: [0x66,0x49,0xe8,0xbe] +0x66,0x49,0xe8,0xbe + +# GFX11: s_swappc_b64 s[104:105], s[2:3] ; encoding: [0x02,0x49,0xe8,0xbe] +0x02,0x49,0xe8,0xbe + +# GFX11: s_swappc_b64 vcc, s[2:3] ; encoding: [0x02,0x49,0xea,0xbe] +0x02,0x49,0xea,0xbe + +# GFX11: s_trap 0 ; encoding: [0x00,0x00,0x90,0xbf] +0x00,0x00,0x90,0xbf + +# GFX11: s_trap 0x1234 ; encoding: [0x34,0x12,0x90,0xbf] +0x34,0x12,0x90,0xbf + +# GFX11: s_trap 0xc1d1 ; encoding: [0xd1,0xc1,0x90,0xbf] +0xd1,0xc1,0x90,0xbf + +# GFX11: s_ttracedata ; encoding: [0x00,0x00,0xba,0xbf] +0x00,0x00,0xba,0xbf + +# GFX11: s_ttracedata_imm 0x0 ; encoding: [0x00,0x00,0xbb,0xbf] +0x00,0x00,0xbb,0xbf + +# GFX11: s_ttracedata_imm 0x1234 ; encoding: [0x34,0x12,0xbb,0xbf] +0x34,0x12,0xbb,0xbf + +# GFX11: s_ttracedata_imm 0xc1d1 ; encoding: [0xd1,0xc1,0xbb,0xbf] +0xd1,0xc1,0xbb,0xbf + +# GFX11: s_version 0x1234 ; encoding: [0x34,0x12,0x80,0xb0] +0x34,0x12,0x80,0xb0 + +# GFX11: s_version 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xb0] +0xd1,0xc1,0x80,0xb0 + +# GFX11: s_waitcnt_depctr depctr_hold_cnt(0) depctr_sa_sdst(0) depctr_va_vdst(0) depctr_va_sdst(0) depctr_va_ssrc(0) depctr_va_vcc(0) depctr_vm_vsrc(0) ; encoding: [0x00,0x00,0x88,0xbf] +0x00,0x00,0x88,0xbf + +# GFX11: s_waitcnt_depctr 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf] +0xfe,0xff,0x88,0xbf + +# GFX11: s_waitcnt_expcnt exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xbd] +0x34,0x12,0x7f,0xbd + +# GFX11: s_waitcnt_expcnt exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xbd] +0x34,0x12,0x7e,0xbd + +# GFX11: s_waitcnt_expcnt m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xbd] +0x34,0x12,0x7d,0xbd + +# GFX11: s_waitcnt_expcnt s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xbd] +0x34,0x12,0x00,0xbd + +# GFX11: s_waitcnt_expcnt s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xbd] +0xd1,0xc1,0x00,0xbd + +# GFX11: s_waitcnt_expcnt s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xbd] +0x34,0x12,0x69,0xbd + +# GFX11: s_waitcnt_expcnt vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xbd] +0x34,0x12,0x6b,0xbd + +# GFX11: s_waitcnt_expcnt vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xbd] +0x34,0x12,0x6a,0xbd + +# GFX11: s_waitcnt_lgkmcnt exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xbd] +0x34,0x12,0xff,0xbd + +# GFX11: s_waitcnt_lgkmcnt exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xbd] +0x34,0x12,0xfe,0xbd + +# GFX11: s_waitcnt_lgkmcnt m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xbd] +0x34,0x12,0xfd,0xbd + +# GFX11: s_waitcnt_lgkmcnt s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xbd] +0x34,0x12,0x80,0xbd + +# GFX11: s_waitcnt_lgkmcnt s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xbd] +0xd1,0xc1,0x80,0xbd + +# GFX11: s_waitcnt_lgkmcnt s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xbd] +0x34,0x12,0xe9,0xbd + +# GFX11: s_waitcnt_lgkmcnt vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xbd] +0x34,0x12,0xeb,0xbd + +# GFX11: s_waitcnt_lgkmcnt vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xbd] +0x34,0x12,0xea,0xbd + +# GFX11: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] +0x00,0x00,0x89,0xbf + +# GFX11: s_waitcnt vmcnt(1) expcnt(5) lgkmcnt(1) ; encoding: [0x15,0x04,0x89,0xbf] +0x15,0x04,0x89,0xbf + +# GFX11: s_waitcnt vmcnt(4) expcnt(3) lgkmcnt(2) ; encoding: [0x23,0x10,0x89,0xbf] +0x23,0x10,0x89,0xbf + +# GFX11: s_waitcnt_vmcnt exec_hi, 0x1234 ; encoding: [0x34,0x12,0xff,0xbc] +0x34,0x12,0xff,0xbc + +# GFX11: s_waitcnt_vmcnt exec_lo, 0x1234 ; encoding: [0x34,0x12,0xfe,0xbc] +0x34,0x12,0xfe,0xbc + +# GFX11: s_waitcnt_vmcnt m0, 0x1234 ; encoding: [0x34,0x12,0xfd,0xbc] +0x34,0x12,0xfd,0xbc + +# GFX11: s_waitcnt_vmcnt s0, 0x1234 ; encoding: [0x34,0x12,0x80,0xbc] +0x34,0x12,0x80,0xbc + +# GFX11: s_waitcnt_vmcnt s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x80,0xbc] +0xd1,0xc1,0x80,0xbc + +# GFX11: s_waitcnt_vmcnt s105, 0x1234 ; encoding: [0x34,0x12,0xe9,0xbc] +0x34,0x12,0xe9,0xbc + +# GFX11: s_waitcnt_vmcnt vcc_hi, 0x1234 ; encoding: [0x34,0x12,0xeb,0xbc] +0x34,0x12,0xeb,0xbc + +# GFX11: s_waitcnt_vmcnt vcc_lo, 0x1234 ; encoding: [0x34,0x12,0xea,0xbc] +0x34,0x12,0xea,0xbc + +# GFX11: s_waitcnt_vscnt exec_hi, 0x1234 ; encoding: [0x34,0x12,0x7f,0xbc] +0x34,0x12,0x7f,0xbc + +# GFX11: s_waitcnt_vscnt exec_lo, 0x1234 ; encoding: [0x34,0x12,0x7e,0xbc] +0x34,0x12,0x7e,0xbc + +# GFX11: s_waitcnt_vscnt m0, 0x1234 ; encoding: [0x34,0x12,0x7d,0xbc] +0x34,0x12,0x7d,0xbc + +# GFX11: s_waitcnt_vscnt s0, 0x1234 ; encoding: [0x34,0x12,0x00,0xbc] +0x34,0x12,0x00,0xbc + +# GFX11: s_waitcnt_vscnt s0, 0xc1d1 ; encoding: [0xd1,0xc1,0x00,0xbc] +0xd1,0xc1,0x00,0xbc + +# GFX11: s_waitcnt_vscnt s105, 0x1234 ; encoding: [0x34,0x12,0x69,0xbc] +0x34,0x12,0x69,0xbc + +# GFX11: s_waitcnt_vscnt vcc_hi, 0x1234 ; encoding: [0x34,0x12,0x6b,0xbc] +0x34,0x12,0x6b,0xbc + +# GFX11: s_waitcnt_vscnt vcc_lo, 0x1234 ; encoding: [0x34,0x12,0x6a,0xbc] +0x34,0x12,0x6a,0xbc + +# GFX11: s_wait_idle ; encoding: [0x00,0x00,0x8a,0xbf] +0x00,0x00,0x8a,0xbf + +# GFX11: s_wakeup ; encoding: [0x00,0x00,0xb4,0xbf] +0x00,0x00,0xb4,0xbf + +# GFX11: s_wqm_b32 exec_hi, s1 ; encoding: [0x01,0x1c,0xff,0xbe] +0x01,0x1c,0xff,0xbe + +# GFX11: s_wqm_b32 exec_lo, s1 ; encoding: [0x01,0x1c,0xfe,0xbe] +0x01,0x1c,0xfe,0xbe + +# GFX11: s_wqm_b32 m0, s1 ; encoding: [0x01,0x1c,0xfd,0xbe] +0x01,0x1c,0xfd,0xbe + +# GFX11: s_wqm_b32 s0, 0.5 ; encoding: [0xf0,0x1c,0x80,0xbe] +0xf0,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, 0 ; encoding: [0x80,0x1c,0x80,0xbe] +0x80,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, 0x3f717273 ; encoding: [0xff,0x1c,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1c,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_wqm_b32 s0, 0xaf123456 ; encoding: [0xff,0x1c,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1c,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_wqm_b32 s0, -1 ; encoding: [0xc1,0x1c,0x80,0xbe] +0xc1,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, -4.0 ; encoding: [0xf7,0x1c,0x80,0xbe] +0xf7,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, exec_hi ; encoding: [0x7f,0x1c,0x80,0xbe] +0x7f,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, exec_lo ; encoding: [0x7e,0x1c,0x80,0xbe] +0x7e,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, m0 ; encoding: [0x7d,0x1c,0x80,0xbe] +0x7d,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, s104 ; encoding: [0x68,0x1c,0x80,0xbe] +0x68,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, s1 ; encoding: [0x01,0x1c,0x80,0xbe] +0x01,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, vcc_hi ; encoding: [0x6b,0x1c,0x80,0xbe] +0x6b,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s0, vcc_lo ; encoding: [0x6a,0x1c,0x80,0xbe] +0x6a,0x1c,0x80,0xbe + +# GFX11: s_wqm_b32 s105, s104 ; encoding: [0x68,0x1c,0xe9,0xbe] +0x68,0x1c,0xe9,0xbe + +# GFX11: s_wqm_b32 s105, s1 ; encoding: [0x01,0x1c,0xe9,0xbe] +0x01,0x1c,0xe9,0xbe + +# GFX11: s_wqm_b32 vcc_hi, s1 ; encoding: [0x01,0x1c,0xeb,0xbe] +0x01,0x1c,0xeb,0xbe + +# GFX11: s_wqm_b32 vcc_lo, s1 ; encoding: [0x01,0x1c,0xea,0xbe] +0x01,0x1c,0xea,0xbe + +# GFX11: s_wqm_b64 exec, s[2:3] ; encoding: [0x02,0x1d,0xfe,0xbe] +0x02,0x1d,0xfe,0xbe + +# GFX11: s_wqm_b64 s[0:1], 0.5 ; encoding: [0xf0,0x1d,0x80,0xbe] +0xf0,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], 0 ; encoding: [0x80,0x1d,0x80,0xbe] +0x80,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x1d,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x1d,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_wqm_b64 s[0:1], -1 ; encoding: [0xc1,0x1d,0x80,0xbe] +0xc1,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], -4.0 ; encoding: [0xf7,0x1d,0x80,0xbe] +0xf7,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], exec ; encoding: [0x7e,0x1d,0x80,0xbe] +0x7e,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], s[102:103] ; encoding: [0x66,0x1d,0x80,0xbe] +0x66,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], s[2:3] ; encoding: [0x02,0x1d,0x80,0xbe] +0x02,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[0:1], vcc ; encoding: [0x6a,0x1d,0x80,0xbe] +0x6a,0x1d,0x80,0xbe + +# GFX11: s_wqm_b64 s[104:105], s[102:103] ; encoding: [0x66,0x1d,0xe8,0xbe] +0x66,0x1d,0xe8,0xbe + +# GFX11: s_wqm_b64 s[104:105], s[2:3] ; encoding: [0x02,0x1d,0xe8,0xbe] +0x02,0x1d,0xe8,0xbe + +# GFX11: s_wqm_b64 vcc, s[2:3] ; encoding: [0x02,0x1d,0xea,0xbe] +0x02,0x1d,0xea,0xbe + +# GFX11: s_xnor_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x90] +0x01,0x02,0x7f,0x90 + +# GFX11: s_xnor_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x90] +0x01,0x02,0x7e,0x90 + +# GFX11: s_xnor_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x90] +0x01,0x02,0x7d,0x90 + +# GFX11: s_xnor_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x90] +0xf0,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x90] +0x80,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x90,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x90,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x90,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x90,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x90] +0xc1,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x90] +0xf7,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x90] +0x7f,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x90] +0x7e,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x90] +0x7d,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x90] +0x68,0x67,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x90] +0x68,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x90] +0x01,0xf0,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x90] +0x01,0x80,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x90,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x90,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x90,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x90,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x90] +0x01,0xc1,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x90] +0x01,0xf7,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x90] +0x01,0x7f,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x90] +0x01,0x7e,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x90] +0x01,0x7d,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x90] +0x01,0x67,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x90] +0x01,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x90] +0x01,0x6b,0x00,0x90 + +# GFX11: s_xnor_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x90] +0x01,0x6a,0x00,0x90 + +# GFX11: s_xnor_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x90] +0x6b,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x90] +0x6a,0x02,0x00,0x90 + +# GFX11: s_xnor_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x90] +0x68,0x67,0x69,0x90 + +# GFX11: s_xnor_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x90] +0x68,0x02,0x69,0x90 + +# GFX11: s_xnor_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x90] +0x01,0x67,0x69,0x90 + +# GFX11: s_xnor_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x90] +0x01,0x02,0x69,0x90 + +# GFX11: s_xnor_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x90] +0x01,0x02,0x6b,0x90 + +# GFX11: s_xnor_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x90] +0x01,0x02,0x6a,0x90 + +# GFX11: s_xnor_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x90] +0x02,0x04,0xfe,0x90 + +# GFX11: s_xnor_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x90] +0xf0,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x90] +0x80,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x90,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x90] +0xc1,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x90] +0xf7,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x90] +0x7e,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x90] +0x66,0x64,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x90] +0x66,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x90] +0x02,0xf0,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x90] +0x02,0x80,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x90,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x90,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x90] +0x02,0xc1,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x90] +0x02,0xf7,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x90] +0x02,0x7e,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x90] +0x02,0x64,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x90] +0x02,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x90] +0x02,0x6a,0x80,0x90 + +# GFX11: s_xnor_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x90] +0x6a,0x04,0x80,0x90 + +# GFX11: s_xnor_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x90] +0x66,0x64,0xe8,0x90 + +# GFX11: s_xnor_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x90] +0x66,0x04,0xe8,0x90 + +# GFX11: s_xnor_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x90] +0x02,0x64,0xe8,0x90 + +# GFX11: s_xnor_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x90] +0x02,0x04,0xe8,0x90 + +# GFX11: s_xnor_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x90] +0x02,0x04,0xea,0x90 + +# GFX11: s_xnor_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x2a,0x80,0xbe] +0xf0,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, 0 ; encoding: [0x80,0x2a,0x80,0xbe] +0x80,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x2a,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2a,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x2a,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2a,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_saveexec_b32 s0, -1 ; encoding: [0xc1,0x2a,0x80,0xbe] +0xc1,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x2a,0x80,0xbe] +0xf7,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x2a,0x80,0xbe] +0x7f,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x2a,0x80,0xbe] +0x7e,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, m0 ; encoding: [0x7d,0x2a,0x80,0xbe] +0x7d,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, s104 ; encoding: [0x68,0x2a,0x80,0xbe] +0x68,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, s1 ; encoding: [0x01,0x2a,0x80,0xbe] +0x01,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x2a,0x80,0xbe] +0x6b,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x2a,0x80,0xbe] +0x6a,0x2a,0x80,0xbe + +# GFX11: s_xnor_saveexec_b32 s105, s104 ; encoding: [0x68,0x2a,0xe9,0xbe] +0x68,0x2a,0xe9,0xbe + +# GFX11: s_xnor_saveexec_b32 s105, s1 ; encoding: [0x01,0x2a,0xe9,0xbe] +0x01,0x2a,0xe9,0xbe + +# GFX11: s_xnor_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x2a,0xeb,0xbe] +0x01,0x2a,0xeb,0xbe + +# GFX11: s_xnor_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x2a,0xea,0xbe] +0x01,0x2a,0xea,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x2b,0x80,0xbe] +0xf0,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x2b,0x80,0xbe] +0x80,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x2b,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x2b,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_xnor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2b,0x80,0xbe] +0xc1,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x2b,0x80,0xbe] +0xf7,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x2b,0x80,0xbe] +0x7e,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x2b,0x80,0xbe] +0x66,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2b,0x80,0xbe] +0x02,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x2b,0x80,0xbe] +0x6a,0x2b,0x80,0xbe + +# GFX11: s_xnor_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x2b,0xe8,0xbe] +0x66,0x2b,0xe8,0xbe + +# GFX11: s_xnor_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x2b,0xe8,0xbe] +0x02,0x2b,0xe8,0xbe + +# GFX11: s_xnor_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x2b,0xea,0xbe] +0x02,0x2b,0xea,0xbe + +# GFX11: s_xor_b32 exec_hi, s1, s2 ; encoding: [0x01,0x02,0x7f,0x8d] +0x01,0x02,0x7f,0x8d + +# GFX11: s_xor_b32 exec_lo, s1, s2 ; encoding: [0x01,0x02,0x7e,0x8d] +0x01,0x02,0x7e,0x8d + +# GFX11: s_xor_b32 m0, s1, s2 ; encoding: [0x01,0x02,0x7d,0x8d] +0x01,0x02,0x7d,0x8d + +# GFX11: s_xor_b32 s0, 0.5, s2 ; encoding: [0xf0,0x02,0x00,0x8d] +0xf0,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, 0, s2 ; encoding: [0x80,0x02,0x00,0x8d] +0x80,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, 0x3f717273, s2 ; encoding: [0xff,0x02,0x00,0x8d,0x73,0x72,0x71,0x3f] +0xff,0x02,0x00,0x8d,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_b32 s0, 0xaf123456, s2 ; encoding: [0xff,0x02,0x00,0x8d,0x56,0x34,0x12,0xaf] +0xff,0x02,0x00,0x8d,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_b32 s0, -1, s2 ; encoding: [0xc1,0x02,0x00,0x8d] +0xc1,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, -4.0, s2 ; encoding: [0xf7,0x02,0x00,0x8d] +0xf7,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, exec_hi, s2 ; encoding: [0x7f,0x02,0x00,0x8d] +0x7f,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, exec_lo, s2 ; encoding: [0x7e,0x02,0x00,0x8d] +0x7e,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, m0, s2 ; encoding: [0x7d,0x02,0x00,0x8d] +0x7d,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, s104, s103 ; encoding: [0x68,0x67,0x00,0x8d] +0x68,0x67,0x00,0x8d + +# GFX11: s_xor_b32 s0, s104, s2 ; encoding: [0x68,0x02,0x00,0x8d] +0x68,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, 0.5 ; encoding: [0x01,0xf0,0x00,0x8d] +0x01,0xf0,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, 0 ; encoding: [0x01,0x80,0x00,0x8d] +0x01,0x80,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, 0x3f717273 ; encoding: [0x01,0xff,0x00,0x8d,0x73,0x72,0x71,0x3f] +0x01,0xff,0x00,0x8d,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_b32 s0, s1, 0xaf123456 ; encoding: [0x01,0xff,0x00,0x8d,0x56,0x34,0x12,0xaf] +0x01,0xff,0x00,0x8d,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_b32 s0, s1, -1 ; encoding: [0x01,0xc1,0x00,0x8d] +0x01,0xc1,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, -4.0 ; encoding: [0x01,0xf7,0x00,0x8d] +0x01,0xf7,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, exec_hi ; encoding: [0x01,0x7f,0x00,0x8d] +0x01,0x7f,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, exec_lo ; encoding: [0x01,0x7e,0x00,0x8d] +0x01,0x7e,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, m0 ; encoding: [0x01,0x7d,0x00,0x8d] +0x01,0x7d,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, s103 ; encoding: [0x01,0x67,0x00,0x8d] +0x01,0x67,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8d] +0x01,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, vcc_hi ; encoding: [0x01,0x6b,0x00,0x8d] +0x01,0x6b,0x00,0x8d + +# GFX11: s_xor_b32 s0, s1, vcc_lo ; encoding: [0x01,0x6a,0x00,0x8d] +0x01,0x6a,0x00,0x8d + +# GFX11: s_xor_b32 s0, vcc_hi, s2 ; encoding: [0x6b,0x02,0x00,0x8d] +0x6b,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s0, vcc_lo, s2 ; encoding: [0x6a,0x02,0x00,0x8d] +0x6a,0x02,0x00,0x8d + +# GFX11: s_xor_b32 s105, s104, s103 ; encoding: [0x68,0x67,0x69,0x8d] +0x68,0x67,0x69,0x8d + +# GFX11: s_xor_b32 s105, s104, s2 ; encoding: [0x68,0x02,0x69,0x8d] +0x68,0x02,0x69,0x8d + +# GFX11: s_xor_b32 s105, s1, s103 ; encoding: [0x01,0x67,0x69,0x8d] +0x01,0x67,0x69,0x8d + +# GFX11: s_xor_b32 s105, s1, s2 ; encoding: [0x01,0x02,0x69,0x8d] +0x01,0x02,0x69,0x8d + +# GFX11: s_xor_b32 vcc_hi, s1, s2 ; encoding: [0x01,0x02,0x6b,0x8d] +0x01,0x02,0x6b,0x8d + +# GFX11: s_xor_b32 vcc_lo, s1, s2 ; encoding: [0x01,0x02,0x6a,0x8d] +0x01,0x02,0x6a,0x8d + +# GFX11: s_xor_b64 exec, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xfe,0x8d] +0x02,0x04,0xfe,0x8d + +# GFX11: s_xor_b64 s[0:1], 0.5, s[4:5] ; encoding: [0xf0,0x04,0x80,0x8d] +0xf0,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], 0, s[4:5] ; encoding: [0x80,0x04,0x80,0x8d] +0x80,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], 0x3f717273, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x8d,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8d] +0xc1,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], -4.0, s[4:5] ; encoding: [0xf7,0x04,0x80,0x8d] +0xf7,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], exec, s[4:5] ; encoding: [0x7e,0x04,0x80,0x8d] +0x7e,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[102:103], s[100:101] ; encoding: [0x66,0x64,0x80,0x8d] +0x66,0x64,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[102:103], s[4:5] ; encoding: [0x66,0x04,0x80,0x8d] +0x66,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], 0.5 ; encoding: [0x02,0xf0,0x80,0x8d] +0x02,0xf0,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], 0 ; encoding: [0x02,0x80,0x80,0x8d] +0x02,0x80,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], 0x3f717273 ; encoding: [0x02,0xff,0x80,0x8d,0x73,0x72,0x71,0x3f] +0x02,0xff,0x80,0x8d,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf] +0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8d] +0x02,0xc1,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], -4.0 ; encoding: [0x02,0xf7,0x80,0x8d] +0x02,0xf7,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8d] +0x02,0x7e,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], s[100:101] ; encoding: [0x02,0x64,0x80,0x8d] +0x02,0x64,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0x8d] +0x02,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], s[2:3], vcc ; encoding: [0x02,0x6a,0x80,0x8d] +0x02,0x6a,0x80,0x8d + +# GFX11: s_xor_b64 s[0:1], vcc, s[4:5] ; encoding: [0x6a,0x04,0x80,0x8d] +0x6a,0x04,0x80,0x8d + +# GFX11: s_xor_b64 s[104:105], s[102:103], s[100:101] ; encoding: [0x66,0x64,0xe8,0x8d] +0x66,0x64,0xe8,0x8d + +# GFX11: s_xor_b64 s[104:105], s[102:103], s[4:5] ; encoding: [0x66,0x04,0xe8,0x8d] +0x66,0x04,0xe8,0x8d + +# GFX11: s_xor_b64 s[104:105], s[2:3], s[100:101] ; encoding: [0x02,0x64,0xe8,0x8d] +0x02,0x64,0xe8,0x8d + +# GFX11: s_xor_b64 s[104:105], s[2:3], s[4:5] ; encoding: [0x02,0x04,0xe8,0x8d] +0x02,0x04,0xe8,0x8d + +# GFX11: s_xor_b64 vcc, s[2:3], s[4:5] ; encoding: [0x02,0x04,0xea,0x8d] +0x02,0x04,0xea,0x8d + +# GFX11: s_xor_saveexec_b32 s0, 0.5 ; encoding: [0xf0,0x24,0x80,0xbe] +0xf0,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, 0 ; encoding: [0x80,0x24,0x80,0xbe] +0x80,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, 0x3f717273 ; encoding: [0xff,0x24,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x24,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_saveexec_b32 s0, 0xaf123456 ; encoding: [0xff,0x24,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x24,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_saveexec_b32 s0, -1 ; encoding: [0xc1,0x24,0x80,0xbe] +0xc1,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, -4.0 ; encoding: [0xf7,0x24,0x80,0xbe] +0xf7,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, exec_hi ; encoding: [0x7f,0x24,0x80,0xbe] +0x7f,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, exec_lo ; encoding: [0x7e,0x24,0x80,0xbe] +0x7e,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, m0 ; encoding: [0x7d,0x24,0x80,0xbe] +0x7d,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, s104 ; encoding: [0x68,0x24,0x80,0xbe] +0x68,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, s1 ; encoding: [0x01,0x24,0x80,0xbe] +0x01,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, vcc_hi ; encoding: [0x6b,0x24,0x80,0xbe] +0x6b,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s0, vcc_lo ; encoding: [0x6a,0x24,0x80,0xbe] +0x6a,0x24,0x80,0xbe + +# GFX11: s_xor_saveexec_b32 s105, s104 ; encoding: [0x68,0x24,0xe9,0xbe] +0x68,0x24,0xe9,0xbe + +# GFX11: s_xor_saveexec_b32 s105, s1 ; encoding: [0x01,0x24,0xe9,0xbe] +0x01,0x24,0xe9,0xbe + +# GFX11: s_xor_saveexec_b32 vcc_hi, s1 ; encoding: [0x01,0x24,0xeb,0xbe] +0x01,0x24,0xeb,0xbe + +# GFX11: s_xor_saveexec_b32 vcc_lo, s1 ; encoding: [0x01,0x24,0xea,0xbe] +0x01,0x24,0xea,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], 0.5 ; encoding: [0xf0,0x25,0x80,0xbe] +0xf0,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], 0 ; encoding: [0x80,0x25,0x80,0xbe] +0x80,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], 0x3f717273 ; encoding: [0xff,0x25,0x80,0xbe,0x73,0x72,0x71,0x3f] +0xff,0x25,0x80,0xbe,0x73,0x72,0x71,0x3f + +# GFX11: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf] +0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf + +# GFX11: s_xor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x25,0x80,0xbe] +0xc1,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], -4.0 ; encoding: [0xf7,0x25,0x80,0xbe] +0xf7,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], exec ; encoding: [0x7e,0x25,0x80,0xbe] +0x7e,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], s[102:103] ; encoding: [0x66,0x25,0x80,0xbe] +0x66,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x25,0x80,0xbe] +0x02,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[0:1], vcc ; encoding: [0x6a,0x25,0x80,0xbe] +0x6a,0x25,0x80,0xbe + +# GFX11: s_xor_saveexec_b64 s[104:105], s[102:103] ; encoding: [0x66,0x25,0xe8,0xbe] +0x66,0x25,0xe8,0xbe + +# GFX11: s_xor_saveexec_b64 s[104:105], s[2:3] ; encoding: [0x02,0x25,0xe8,0xbe] +0x02,0x25,0xe8,0xbe + +# GFX11: s_xor_saveexec_b64 vcc, s[2:3] ; encoding: [0x02,0x25,0xea,0xbe] +0x02,0x25,0xea,0xbe + +# GFX11: lds_direct_load v10 wait_vdst:6 ; encoding: [0x0a,0x00,0x16,0xce] +0x0a,0x00,0x16,0xce + +# GFX11: lds_direct_load v11 wait_vdst:5 ; encoding: [0x0b,0x00,0x15,0xce] +0x0b,0x00,0x15,0xce + +# GFX11: lds_direct_load v12 wait_vdst:4 ; encoding: [0x0c,0x00,0x14,0xce] +0x0c,0x00,0x14,0xce + +# GFX11: lds_direct_load v13 wait_vdst:3 ; encoding: [0x0d,0x00,0x13,0xce] +0x0d,0x00,0x13,0xce + +# GFX11: lds_direct_load v14 wait_vdst:2 ; encoding: [0x0e,0x00,0x12,0xce] +0x0e,0x00,0x12,0xce + +# GFX11: lds_direct_load v15 wait_vdst:1 ; encoding: [0x0f,0x00,0x11,0xce] +0x0f,0x00,0x11,0xce + +# GFX11: lds_direct_load v16 ; encoding: [0x10,0x00,0x10,0xce] +0x10,0x00,0x10,0xce + +# GFX11: lds_direct_load v17 ; encoding: [0x11,0x00,0x10,0xce] +0x11,0x00,0x10,0xce + +# GFX11: lds_direct_load v1 wait_vdst:15 ; encoding: [0x01,0x00,0x1f,0xce] +0x01,0x00,0x1f,0xce + +# GFX11: lds_direct_load v2 wait_vdst:14 ; encoding: [0x02,0x00,0x1e,0xce] +0x02,0x00,0x1e,0xce + +# GFX11: lds_direct_load v3 wait_vdst:13 ; encoding: [0x03,0x00,0x1d,0xce] +0x03,0x00,0x1d,0xce + +# GFX11: lds_direct_load v4 wait_vdst:12 ; encoding: [0x04,0x00,0x1c,0xce] +0x04,0x00,0x1c,0xce + +# GFX11: lds_direct_load v5 wait_vdst:11 ; encoding: [0x05,0x00,0x1b,0xce] +0x05,0x00,0x1b,0xce + +# GFX11: lds_direct_load v6 wait_vdst:10 ; encoding: [0x06,0x00,0x1a,0xce] +0x06,0x00,0x1a,0xce + +# GFX11: lds_direct_load v7 wait_vdst:9 ; encoding: [0x07,0x00,0x19,0xce] +0x07,0x00,0x19,0xce + +# GFX11: lds_direct_load v8 wait_vdst:8 ; encoding: [0x08,0x00,0x18,0xce] +0x08,0x00,0x18,0xce + +# GFX11: lds_direct_load v9 wait_vdst:7 ; encoding: [0x09,0x00,0x17,0xce] +0x09,0x00,0x17,0xce + +# GFX11: lds_param_load v10, attr11.x wait_vdst:6 ; encoding: [0x0a,0x2c,0x06,0xce] +0x0a,0x2c,0x06,0xce + +# GFX11: lds_param_load v11, attr22.y wait_vdst:5 ; encoding: [0x0b,0x59,0x05,0xce] +0x0b,0x59,0x05,0xce + +# GFX11: lds_param_load v12, attr33.z wait_vdst:4 ; encoding: [0x0c,0x86,0x04,0xce] +0x0c,0x86,0x04,0xce + +# GFX11: lds_param_load v13, attr63.x wait_vdst:3 ; encoding: [0x0d,0xfc,0x03,0xce] +0x0d,0xfc,0x03,0xce + +# GFX11: lds_param_load v14, attr63.y wait_vdst:2 ; encoding: [0x0e,0xfd,0x02,0xce] +0x0e,0xfd,0x02,0xce + +# GFX11: lds_param_load v15, attr63.z wait_vdst:1 ; encoding: [0x0f,0xfe,0x01,0xce] +0x0f,0xfe,0x01,0xce + +# GFX11: lds_param_load v16, attr63.w ; encoding: [0x10,0xff,0x00,0xce] +0x10,0xff,0x00,0xce + +# GFX11: lds_param_load v17, attr63.w ; encoding: [0x11,0xff,0x00,0xce] +0x11,0xff,0x00,0xce + +# GFX11: lds_param_load v1, attr0.x wait_vdst:15 ; encoding: [0x01,0x00,0x0f,0xce] +0x01,0x00,0x0f,0xce + +# GFX11: lds_param_load v2, attr0.y wait_vdst:14 ; encoding: [0x02,0x01,0x0e,0xce] +0x02,0x01,0x0e,0xce + +# GFX11: lds_param_load v3, attr0.z wait_vdst:13 ; encoding: [0x03,0x02,0x0d,0xce] +0x03,0x02,0x0d,0xce + +# GFX11: lds_param_load v4, attr0.w wait_vdst:12 ; encoding: [0x04,0x03,0x0c,0xce] +0x04,0x03,0x0c,0xce + +# GFX11: lds_param_load v5, attr0.x wait_vdst:11 ; encoding: [0x05,0x00,0x0b,0xce] +0x05,0x00,0x0b,0xce + +# GFX11: lds_param_load v6, attr1.x wait_vdst:10 ; encoding: [0x06,0x04,0x0a,0xce] +0x06,0x04,0x0a,0xce + +# GFX11: lds_param_load v7, attr2.y wait_vdst:9 ; encoding: [0x07,0x09,0x09,0xce] +0x07,0x09,0x09,0xce + +# GFX11: lds_param_load v8, attr3.z wait_vdst:8 ; encoding: [0x08,0x0e,0x08,0xce] +0x08,0x0e,0x08,0xce + +# GFX11: lds_param_load v9, attr4.w wait_vdst:7 ; encoding: [0x09,0x13,0x07,0xce] +0x09,0x13,0x07,0xce + +# GFX11: v_mul_f32_e32 v144, v65, v152 ; encoding: [0x41,0x31,0x21,0x11] +0x41,0x31,0x21,0x11 + +# GFX11: s_load_b32 s101, s[2:3], s0 ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[100:101], s0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[2:3], null ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_load_b32 s5, s[2:3], 0x1234 glc dlc ; encoding: [0x41,0x61,0x00,0xf4,0x34,0x12,0x00,0xf8] +0x41,0x61,0x00,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_load_b32 s5, s[2:3], m0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_load_b32 s5, s[2:3], s0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[2:3], s0 dlc ; encoding: [0x41,0x21,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x21,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[2:3], s0 glc ; encoding: [0x41,0x41,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x41,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[2:3], s0 glc dlc ; encoding: [0x41,0x61,0x00,0xf4,0x00,0x00,0x00,0x00] +0x41,0x61,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_load_b32 s5, s[2:3], vcc_hi ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_load_b32 s5, s[2:3], vcc_lo ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4] +0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_load_b32 s5, s[4:5], s0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 s5, vcc, s0 ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] +0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 vcc_hi, s[2:3], s0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] +0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b32 vcc_lo, s[2:3], s0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00] +0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[100:101], s0 ; encoding: [0x32,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] +0x32,0x05,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[2:3], null ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_load_b512 s[20:35], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x65,0x10,0xf4,0x34,0x12,0x00,0xf8] +0x01,0x65,0x10,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_load_b512 s[20:35], s[2:3], m0 ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_load_b512 s[20:35], s[2:3], s0 ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[2:3], s0 dlc ; encoding: [0x01,0x25,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x25,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[2:3], s0 glc ; encoding: [0x01,0x45,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x45,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[2:3], s0 glc dlc ; encoding: [0x01,0x65,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x65,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], s[2:3], s101 ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xca] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_load_b512 s[20:35], s[2:3], vcc_hi ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_load_b512 s[20:35], s[2:3], vcc_lo ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_load_b512 s[20:35], s[4:5], s0 ; encoding: [0x02,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[20:35], vcc, s0 ; encoding: [0x35,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] +0x35,0x05,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[24:39], s[2:3], s0 ; encoding: [0x01,0x06,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x06,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b512 s[84:99], s[2:3], s0 ; encoding: [0x01,0x15,0x10,0xf4,0x00,0x00,0x00,0x00] +0x01,0x15,0x10,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x04,0xf4,0x00,0x00,0x00,0x00] +0x01,0x19,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[100:101], s0 ; encoding: [0xb2,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] +0xb2,0x02,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[2:3], null ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xf8] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_load_b64 s[10:11], s[2:3], 0x1234 glc dlc ; encoding: [0x81,0x62,0x04,0xf4,0x34,0x12,0x00,0xf8] +0x81,0x62,0x04,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_load_b64 s[10:11], s[2:3], m0 ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_load_b64 s[10:11], s[2:3], s0 ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[2:3], s0 dlc ; encoding: [0x81,0x22,0x04,0xf4,0x00,0x00,0x00,0x00] +0x81,0x22,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x42,0x04,0xf4,0x00,0x00,0x00,0x00] +0x81,0x42,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[2:3], s0 glc dlc ; encoding: [0x81,0x62,0x04,0xf4,0x00,0x00,0x00,0x00] +0x81,0x62,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xca] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_load_b64 s[10:11], s[2:3], vcc_hi ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xd6] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_load_b64 s[10:11], s[2:3], vcc_lo ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xd4] +0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_load_b64 s[10:11], s[4:5], s0 ; encoding: [0x82,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] +0x82,0x02,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[10:11], vcc, s0 ; encoding: [0xb5,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] +0xb5,0x02,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 s[12:13], s[2:3], s0 ; encoding: [0x01,0x03,0x04,0xf4,0x00,0x00,0x00,0x00] +0x01,0x03,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b64 vcc, s[2:3], s0 ; encoding: [0x81,0x1a,0x04,0xf4,0x00,0x00,0x00,0x00] +0x81,0x1a,0x04,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[100:101], s0 ; encoding: [0x32,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] +0x32,0x05,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[2:3], null ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_load_b128 s[20:23], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x65,0x08,0xf4,0x34,0x12,0x00,0xf8] +0x01,0x65,0x08,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_load_b128 s[20:23], s[2:3], m0 ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_load_b128 s[20:23], s[2:3], s0 ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[2:3], s0 dlc ; encoding: [0x01,0x25,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x25,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x45,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x45,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[2:3], s0 glc dlc ; encoding: [0x01,0x65,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x65,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xca] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_load_b128 s[20:23], s[2:3], vcc_hi ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_load_b128 s[20:23], s[2:3], vcc_lo ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_load_b128 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[20:23], vcc, s0 ; encoding: [0x35,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] +0x35,0x05,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[24:27], s[2:3], s0 ; encoding: [0x01,0x06,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x06,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b128 s[96:99], s[2:3], s0 ; encoding: [0x01,0x18,0x08,0xf4,0x00,0x00,0x00,0x00] +0x01,0x18,0x08,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[100:101], s0 ; encoding: [0x32,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x32,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[2:3], null ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xf8] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_load_b256 s[20:27], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x65,0x0c,0xf4,0x34,0x12,0x00,0xf8] +0x01,0x65,0x0c,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_load_b256 s[20:27], s[2:3], m0 ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_load_b256 s[20:27], s[2:3], s0 ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[2:3], s0 dlc ; encoding: [0x01,0x25,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x25,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[2:3], s0 glc ; encoding: [0x01,0x45,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x45,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[2:3], s0 glc dlc ; encoding: [0x01,0x65,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x65,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], s[2:3], s101 ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xca] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_load_b256 s[20:27], s[2:3], vcc_hi ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xd6] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_load_b256 s[20:27], s[2:3], vcc_lo ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xd4] +0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_load_b256 s[20:27], s[4:5], s0 ; encoding: [0x02,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[20:27], vcc, s0 ; encoding: [0x35,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x35,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[24:31], s[2:3], s0 ; encoding: [0x01,0x06,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x06,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_load_b256 s[92:99], s[2:3], s0 ; encoding: [0x01,0x17,0x0c,0xf4,0x00,0x00,0x00,0x00] +0x01,0x17,0x0c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x19,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[4:7], null ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xf8] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_buffer_load_b32 s5, s[4:7], 0x1234 glc dlc ; encoding: [0x42,0x61,0x20,0xf4,0x34,0x12,0x00,0xf8] +0x42,0x61,0x20,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_buffer_load_b32 s5, s[4:7], m0 ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_buffer_load_b32 s5, s[4:7], s0 ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[4:7], s0 dlc ; encoding: [0x42,0x21,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x21,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[4:7], s0 glc ; encoding: [0x42,0x41,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x41,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[4:7], s0 glc dlc ; encoding: [0x42,0x61,0x20,0xf4,0x00,0x00,0x00,0x00] +0x42,0x61,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[4:7], s101 ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xca] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_buffer_load_b32 s5, s[4:7], vcc_hi ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xd6] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_buffer_load_b32 s5, s[4:7], vcc_lo ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xd4] +0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_buffer_load_b32 s5, s[8:11], s0 ; encoding: [0x44,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] +0x44,0x01,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 s5, s[96:99], s0 ; encoding: [0x70,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] +0x70,0x01,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 vcc_hi, s[4:7], s0 ; encoding: [0xc2,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00] +0xc2,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b32 vcc_lo, s[4:7], s0 ; encoding: [0x82,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00] +0x82,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], null ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x65,0x30,0xf4,0x34,0x12,0x00,0xf8] +0x02,0x65,0x30,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], m0 ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], s0 ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], s0 dlc ; encoding: [0x02,0x25,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x25,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], s0 glc ; encoding: [0x02,0x45,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x45,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], s0 glc dlc ; encoding: [0x02,0x65,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x65,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], s101 ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xca] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], vcc_hi ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_buffer_load_b512 s[20:35], s[4:7], vcc_lo ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_buffer_load_b512 s[20:35], s[8:11], s0 ; encoding: [0x04,0x05,0x30,0xf4,0x00,0x00,0x00,0x00] +0x04,0x05,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[20:35], s[96:99], s0 ; encoding: [0x30,0x05,0x30,0xf4,0x00,0x00,0x00,0x00] +0x30,0x05,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[24:39], s[4:7], s0 ; encoding: [0x02,0x06,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x06,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b512 s[84:99], s[4:7], s0 ; encoding: [0x02,0x15,0x30,0xf4,0x00,0x00,0x00,0x00] +0x02,0x15,0x30,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x24,0xf4,0x00,0x00,0x00,0x00] +0x02,0x19,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], null ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], 0x1234 glc dlc ; encoding: [0x82,0x62,0x24,0xf4,0x34,0x12,0x00,0xf8] +0x82,0x62,0x24,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], m0 ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0x00] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], s0 dlc ; encoding: [0x82,0x22,0x24,0xf4,0x00,0x00,0x00,0x00] +0x82,0x22,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x42,0x24,0xf4,0x00,0x00,0x00,0x00] +0x82,0x42,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], s0 glc dlc ; encoding: [0x82,0x62,0x24,0xf4,0x00,0x00,0x00,0x00] +0x82,0x62,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], s101 ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xca] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], vcc_hi ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xd6] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_buffer_load_b64 s[10:11], s[4:7], vcc_lo ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xd4] +0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_buffer_load_b64 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0x24,0xf4,0x00,0x00,0x00,0x00] +0x84,0x02,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0x24,0xf4,0x00,0x00,0x00,0x00] +0xb0,0x02,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x24,0xf4,0x00,0x00,0x00,0x00] +0x02,0x03,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b64 vcc, s[4:7], s0 ; encoding: [0x82,0x1a,0x24,0xf4,0x00,0x00,0x00,0x00] +0x82,0x1a,0x24,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], null ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x65,0x28,0xf4,0x34,0x12,0x00,0xf8] +0x02,0x65,0x28,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], m0 ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], s0 ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], s0 dlc ; encoding: [0x02,0x25,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x25,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x45,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x45,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], s0 glc dlc ; encoding: [0x02,0x65,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x65,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xca] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], vcc_hi ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_buffer_load_b128 s[20:23], s[4:7], vcc_lo ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_buffer_load_b128 s[20:23], s[8:11], s0 ; encoding: [0x04,0x05,0x28,0xf4,0x00,0x00,0x00,0x00] +0x04,0x05,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[20:23], s[96:99], s0 ; encoding: [0x30,0x05,0x28,0xf4,0x00,0x00,0x00,0x00] +0x30,0x05,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[24:27], s[4:7], s0 ; encoding: [0x02,0x06,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x06,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b128 s[96:99], s[4:7], s0 ; encoding: [0x02,0x18,0x28,0xf4,0x00,0x00,0x00,0x00] +0x02,0x18,0x28,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], null ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xf8] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xf8 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x65,0x2c,0xf4,0x34,0x12,0x00,0xf8] +0x02,0x65,0x2c,0xf4,0x34,0x12,0x00,0xf8 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], m0 ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], s0 ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], s0 dlc ; encoding: [0x02,0x25,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x25,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], s0 glc ; encoding: [0x02,0x45,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x45,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], s0 glc dlc ; encoding: [0x02,0x65,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x65,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], s101 ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xca] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xca + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], vcc_hi ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xd6] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xd6 + +# GFX11: s_buffer_load_b256 s[20:27], s[4:7], vcc_lo ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xd4] +0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xd4 + +# GFX11: s_buffer_load_b256 s[20:27], s[8:11], s0 ; encoding: [0x04,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x04,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[20:27], s[96:99], s0 ; encoding: [0x30,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x30,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[24:31], s[4:7], s0 ; encoding: [0x02,0x06,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x06,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_buffer_load_b256 s[92:99], s[4:7], s0 ; encoding: [0x02,0x17,0x2c,0xf4,0x00,0x00,0x00,0x00] +0x02,0x17,0x2c,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_dcache_inv ; encoding: [0x00,0x00,0x84,0xf4,0x00,0x00,0x00,0x00] +0x00,0x00,0x84,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_gl1_inv ; encoding: [0x00,0x00,0x80,0xf4,0x00,0x00,0x00,0x00] +0x00,0x00,0x80,0xf4,0x00,0x00,0x00,0x00 + +# GFX11: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8] +0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8 + +# GFX11: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0x12] +0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0x12 + +# GFX11: s_atc_probe 7, s[4:5], s2 ; encoding: [0xc2,0x01,0x88,0xf4,0x00,0x00,0x00,0x04] +0xc2,0x01,0x88,0xf4,0x00,0x00,0x00,0x04 + +# GFX11: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0xf8] +0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0xf8 + +# GFX11: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0x12] +0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0x12 + +# GFX11: s_atc_probe_buffer 7, s[8:11], s2 ; encoding: [0xc4,0x01,0x8c,0xf4,0x00,0x00,0x00,0x04] +0xc4,0x01,0x8c,0xf4,0x00,0x00,0x00,0x04 + +# GFX11: v_add3_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x55,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x55,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_add3_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x55,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_add3_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x55,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_add3_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x55,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_add3_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x55,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_add3_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x55,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_add3_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x55,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_add3_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x55,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_add3_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x55,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_add3_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x55,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_add3_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_add3_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_add3_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_add3_u32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_add3_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x55,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_add3_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_add3_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_add3_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x55,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_add3_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x55,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_add3_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x55,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_add3_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_add3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x55,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x55,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 + +# GFX11: v_add3_u32_e64_dpp v5, v1, v2, s4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x04,0x12,0x00,0x01,0x77,0x39,0x05 + +# W32: v_add_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x00,0xd7,0x01,0x05,0x02,0x00] +# W64: v_add_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x00,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x00,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0xf0,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x80,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x80,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0xc1,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0xf7,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x7f,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x7e,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7d,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x7d,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x67,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x67,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x01,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x00,0xd7,0x01,0xe1,0x01,0x00 + +# W32: v_add_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x01,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x00,0xd7,0x01,0x01,0x01,0x00 + +# W32: v_add_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x83,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x00,0xd7,0x01,0x83,0x01,0x00 + +# W32: v_add_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xef,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x00,0xd7,0x01,0xef,0x01,0x00 + +# W32: v_add_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xff,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xff,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xfd,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xfb,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xfb,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xcf,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x05,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0x05,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xff,0x03,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x00,0xd7,0x01,0xff,0x03,0x00 + +# W32: v_add_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x05,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x00,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xd7,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x00,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x00,0xd7,0x01,0xd5,0x00,0x00 + +# W32: v_add_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xff,0x05,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x00,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x00,0xd7,0xff,0x05,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x6b,0x04,0x02,0x00 + +# W32: v_add_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x00,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x00,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x27,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], exec, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], s[102:103], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], s[2:3], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], s[4:5], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x27,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x27,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x27,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x27,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], exec ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], s[102:103] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xcd,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0xcd,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], s[4:5] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], s[6:7] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] clamp ; encoding: [0x05,0x80,0x27,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x27,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] div:2 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], -v[1:2], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_add_f64 v[5:6], v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_add_f64 v[5:6], -v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] mul:2 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] mul:4 ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_add_f64 v[5:6], v[1:2], v[254:255] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x27,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], vcc ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], v[254:255], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x27,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], vcc, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_add_f64 v[5:6], v[1:2], null ; encoding: [0x05,0x00,0x27,0xd7,0x01,0xf9,0x00,0x00] +0x05,0x00,0x27,0xd7,0x01,0xf9,0x00,0x00 + +# GFX11: v_add_f64 v[5:6], null, v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x7c,0x04,0x02,0x00] +0x05,0x00,0x27,0xd7,0x7c,0x04,0x02,0x00 + +# GFX11: v_add_lshl_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x47,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x47,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x47,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x47,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x47,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x47,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x47,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_add_lshl_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x47,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_add_lshl_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x47,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_add_lshl_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x47,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_add_lshl_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x47,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_add_lshl_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x47,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_add_lshl_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x47,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_add_lshl_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x47,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_add_lshl_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x47,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_add_lshl_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x47,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x47,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_add_nc_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0d,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x0d,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x0d,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_add_nc_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x0d,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_add_nc_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x0d,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_add_nc_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x0d,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_add_nc_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x0d,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v255, v1, v2 ; encoding: [0xff,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x26,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x26,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, 0, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, -1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x26,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, m0, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, s103, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, s1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x26,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_add_nc_i32 v5, v1, 0 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x26,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_add_nc_i32 v5, v1, -1 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x26,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_add_nc_i32 v5, v1, -4.0 clamp ; encoding: [0x05,0x80,0x26,0xd7,0x01,0xef,0x01,0x00] +0x05,0x80,0x26,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_add_nc_i32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x26,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_add_nc_i32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, m0 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, s103 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, s2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, v255 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x26,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x26,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x26,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_add_nc_i32 v5, v255, v2 ; encoding: [0x05,0x00,0x26,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x26,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_add_nc_i32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x26,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x03,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_add_nc_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, 0xc400 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x80,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x03,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_add_nc_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x03,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x03,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_add_nc_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x03,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_add_nc_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_alignbit_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x16,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x16,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x16,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_alignbit_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_alignbit_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_alignbit_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_alignbit_b32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_alignbit_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x16,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_alignbit_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_alignbit_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x16,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_alignbit_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x16,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_alignbit_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x16,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_alignbit_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x16,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x16,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x17,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x17,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x17,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_alignbyte_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x17,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_alignbyte_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x17,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_alignbyte_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x17,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_alignbyte_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x17,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_alignbyte_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x17,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x57,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x57,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x57,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_and_or_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x57,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_and_or_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x57,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_and_or_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x57,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_and_or_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x57,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_and_or_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x57,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_and_or_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x57,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_and_or_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x57,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_and_or_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x57,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_and_or_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_and_or_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_and_or_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_and_or_b32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_and_or_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x57,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_and_or_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_and_or_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x57,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_and_or_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x57,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_and_or_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x57,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_and_or_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x57,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x57,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_ashrrev_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x3a,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x3a,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x3a,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x3a,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x3a,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3a,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_ashrrev_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x3a,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x3a,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x3a,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[254:255], v1, v[2:3] ; encoding: [0xfe,0x00,0x3e,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x3e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], exec_hi, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], m0, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], s101, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], s1, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, 0.5 ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, 0 ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, -1 ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, -4.0 ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, exec ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, s[100:101] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xc9,0x00,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xc9,0x00,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, s[4:5] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, s[6:7] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, v[254:255] ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], v255, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x3e,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], vcc_hi, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_ashrrev_i64 v[5:6], vcc_lo, v[2:3] ; encoding: [0x05,0x00,0x3e,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x3e,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x1e,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x1e,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x1e,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_bcnt_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x1e,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_bcnt_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x1e,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x1e,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_bfe_i32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x11,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x11,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x11,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_bfe_i32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x11,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_bfe_i32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x11,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_bfe_i32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x11,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_bfe_i32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x11,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_bfe_i32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x11,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_bfe_i32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x11,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_bfe_i32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x11,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_bfe_i32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x11,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_bfe_i32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_bfe_i32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_bfe_i32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_bfe_i32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_bfe_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_bfe_i32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x11,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_bfe_i32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_bfe_i32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x11,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_bfe_i32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x11,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_bfe_i32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x11,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_bfe_i32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x11,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x11,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x10,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x10,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x10,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_bfe_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x10,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_bfe_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x10,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_bfe_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x10,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_bfe_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x10,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_bfe_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x10,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_bfe_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x10,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_bfe_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x10,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_bfe_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x10,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_bfe_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_bfe_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_bfe_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_bfe_u32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_bfe_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_bfe_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x10,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_bfe_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_bfe_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x10,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_bfe_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x10,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_bfe_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x10,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x10,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x10,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00] +0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00 + +# GFX11: v_bfi_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x12,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_bfi_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x12,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_bfi_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x12,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_bfi_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x12,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_bfi_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x12,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_bfi_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x12,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_bfi_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x12,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_bfi_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x12,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_bfi_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x12,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_bfi_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_bfi_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_bfi_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_bfi_b32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_bfi_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_bfi_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x12,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_bfi_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_bfi_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x12,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_bfi_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x12,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_bfi_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x12,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_bfi_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x12,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x12,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_bfm_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_bfm_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_bfm_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x1d,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_bfm_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x1d,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_bfm_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_bfm_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_bfm_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_bfm_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x1d,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_bfm_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x1d,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_bfm_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_bfm_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x1d,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x1d,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_bfrev_b32_e32 v255, v1 ; encoding: [0x01,0x71,0xfe,0x7f] +0x01,0x71,0xfe,0x7f + +# GFX11: v_bfrev_b32_e32 v5, 0.5 ; encoding: [0xf0,0x70,0x0a,0x7e] +0xf0,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, 0 ; encoding: [0x80,0x70,0x0a,0x7e] +0x80,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x70,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x70,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_bfrev_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x70,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x70,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_bfrev_b32_e32 v5, -1 ; encoding: [0xc1,0x70,0x0a,0x7e] +0xc1,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, -4.0 ; encoding: [0xf7,0x70,0x0a,0x7e] +0xf7,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, exec_hi ; encoding: [0x7f,0x70,0x0a,0x7e] +0x7f,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, exec_lo ; encoding: [0x7e,0x70,0x0a,0x7e] +0x7e,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, m0 ; encoding: [0x7d,0x70,0x0a,0x7e] +0x7d,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, s103 ; encoding: [0x67,0x70,0x0a,0x7e] +0x67,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, s1 ; encoding: [0x01,0x70,0x0a,0x7e] +0x01,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] +0x01,0x71,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, v255 ; encoding: [0xff,0x71,0x0a,0x7e] +0xff,0x71,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x70,0x0a,0x7e] +0x6b,0x70,0x0a,0x7e + +# GFX11: v_bfrev_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x70,0x0a,0x7e] +0x6a,0x70,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v255, v1 ; encoding: [0x01,0x45,0xfe,0x7f] +0x01,0x45,0xfe,0x7f + +# GFX11: v_ceil_f32_e32 v5, 0.5 ; encoding: [0xf0,0x44,0x0a,0x7e] +0xf0,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, 0 ; encoding: [0x80,0x44,0x0a,0x7e] +0x80,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x44,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x44,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_ceil_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x44,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x44,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f32_e32 v5, -1 ; encoding: [0xc1,0x44,0x0a,0x7e] +0xc1,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, -4.0 ; encoding: [0xf7,0x44,0x0a,0x7e] +0xf7,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, exec_hi ; encoding: [0x7f,0x44,0x0a,0x7e] +0x7f,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, exec_lo ; encoding: [0x7e,0x44,0x0a,0x7e] +0x7e,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, m0 ; encoding: [0x7d,0x44,0x0a,0x7e] +0x7d,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, s103 ; encoding: [0x67,0x44,0x0a,0x7e] +0x67,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, s1 ; encoding: [0x01,0x44,0x0a,0x7e] +0x01,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, v1 ; encoding: [0x01,0x45,0x0a,0x7e] +0x01,0x45,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, v255 ; encoding: [0xff,0x45,0x0a,0x7e] +0xff,0x45,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x44,0x0a,0x7e] +0x6b,0x44,0x0a,0x7e + +# GFX11: v_ceil_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x44,0x0a,0x7e] +0x6a,0x44,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x31,0xfc,0x7f] +0x01,0x31,0xfc,0x7f + +# GFX11: v_ceil_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x30,0x0a,0x7e] +0xf0,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], 0 ; encoding: [0x80,0x30,0x0a,0x7e] +0x80,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x30,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x30,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_ceil_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x30,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x30,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x30,0x0a,0x7e] +0xc1,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x30,0x0a,0x7e] +0xf7,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], exec ; encoding: [0x7e,0x30,0x0a,0x7e] +0x7e,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x30,0x0a,0x7e] +0x66,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x30,0x0a,0x7e] +0x02,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x30,0x0a,0x7e] +0x04,0x30,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x31,0x0a,0x7e] +0x01,0x31,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x31,0x0a,0x7e] +0xfe,0x31,0x0a,0x7e + +# GFX11: v_ceil_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x30,0x0a,0x7e] +0x6a,0x30,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v255, v1 ; encoding: [0x01,0x77,0xfe,0x7f] +0x01,0x77,0xfe,0x7f + +# GFX11: v_cls_i32_e32 v5, 0.5 ; encoding: [0xf0,0x76,0x0a,0x7e] +0xf0,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, 0 ; encoding: [0x80,0x76,0x0a,0x7e] +0x80,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, 0x3f717273 ; encoding: [0xff,0x76,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x76,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cls_i32_e32 v5, 0xaf123456 ; encoding: [0xff,0x76,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x76,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cls_i32_e32 v5, -1 ; encoding: [0xc1,0x76,0x0a,0x7e] +0xc1,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, -4.0 ; encoding: [0xf7,0x76,0x0a,0x7e] +0xf7,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, exec_hi ; encoding: [0x7f,0x76,0x0a,0x7e] +0x7f,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, exec_lo ; encoding: [0x7e,0x76,0x0a,0x7e] +0x7e,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, m0 ; encoding: [0x7d,0x76,0x0a,0x7e] +0x7d,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, s103 ; encoding: [0x67,0x76,0x0a,0x7e] +0x67,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, s1 ; encoding: [0x01,0x76,0x0a,0x7e] +0x01,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, v1 ; encoding: [0x01,0x77,0x0a,0x7e] +0x01,0x77,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, v255 ; encoding: [0xff,0x77,0x0a,0x7e] +0xff,0x77,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, vcc_hi ; encoding: [0x6b,0x76,0x0a,0x7e] +0x6b,0x76,0x0a,0x7e + +# GFX11: v_cls_i32_e32 v5, vcc_lo ; encoding: [0x6a,0x76,0x0a,0x7e] +0x6a,0x76,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v255, v1 ; encoding: [0x01,0x75,0xfe,0x7f] +0x01,0x75,0xfe,0x7f + +# GFX11: v_ctz_i32_b32_e32 v5, 0.5 ; encoding: [0xf0,0x74,0x0a,0x7e] +0xf0,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, 0 ; encoding: [0x80,0x74,0x0a,0x7e] +0x80,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x74,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x74,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_ctz_i32_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x74,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x74,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_ctz_i32_b32_e32 v5, -1 ; encoding: [0xc1,0x74,0x0a,0x7e] +0xc1,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, -4.0 ; encoding: [0xf7,0x74,0x0a,0x7e] +0xf7,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, exec_hi ; encoding: [0x7f,0x74,0x0a,0x7e] +0x7f,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, exec_lo ; encoding: [0x7e,0x74,0x0a,0x7e] +0x7e,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, m0 ; encoding: [0x7d,0x74,0x0a,0x7e] +0x7d,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, s103 ; encoding: [0x67,0x74,0x0a,0x7e] +0x67,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, s1 ; encoding: [0x01,0x74,0x0a,0x7e] +0x01,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, v1 ; encoding: [0x01,0x75,0x0a,0x7e] +0x01,0x75,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, v255 ; encoding: [0xff,0x75,0x0a,0x7e] +0xff,0x75,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x74,0x0a,0x7e] +0x6b,0x74,0x0a,0x7e + +# GFX11: v_ctz_i32_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x74,0x0a,0x7e] +0x6a,0x74,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v255, v1 ; encoding: [0x01,0x73,0xfe,0x7f] +0x01,0x73,0xfe,0x7f + +# GFX11: v_clz_i32_u32_e32 v5, 0.5 ; encoding: [0xf0,0x72,0x0a,0x7e] +0xf0,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, 0 ; encoding: [0x80,0x72,0x0a,0x7e] +0x80,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, 0x3f717273 ; encoding: [0xff,0x72,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x72,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_clz_i32_u32_e32 v5, 0xaf123456 ; encoding: [0xff,0x72,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x72,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_clz_i32_u32_e32 v5, -1 ; encoding: [0xc1,0x72,0x0a,0x7e] +0xc1,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, -4.0 ; encoding: [0xf7,0x72,0x0a,0x7e] +0xf7,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, exec_hi ; encoding: [0x7f,0x72,0x0a,0x7e] +0x7f,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, exec_lo ; encoding: [0x7e,0x72,0x0a,0x7e] +0x7e,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, m0 ; encoding: [0x7d,0x72,0x0a,0x7e] +0x7d,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, s103 ; encoding: [0x67,0x72,0x0a,0x7e] +0x67,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, s1 ; encoding: [0x01,0x72,0x0a,0x7e] +0x01,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, v1 ; encoding: [0x01,0x73,0x0a,0x7e] +0x01,0x73,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, v255 ; encoding: [0xff,0x73,0x0a,0x7e] +0xff,0x73,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, vcc_hi ; encoding: [0x6b,0x72,0x0a,0x7e] +0x6b,0x72,0x0a,0x7e + +# GFX11: v_clz_i32_u32_e32 v5, vcc_lo ; encoding: [0x6a,0x72,0x0a,0x7e] +0x6a,0x72,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v255, v1 ; encoding: [0x01,0x6d,0xfe,0x7f] +0x01,0x6d,0xfe,0x7f + +# GFX11: v_cos_f32_e32 v5, 0.5 ; encoding: [0xf0,0x6c,0x0a,0x7e] +0xf0,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, 0 ; encoding: [0x80,0x6c,0x0a,0x7e] +0x80,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x6c,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x6c,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cos_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x6c,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x6c,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cos_f32_e32 v5, -1 ; encoding: [0xc1,0x6c,0x0a,0x7e] +0xc1,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, -4.0 ; encoding: [0xf7,0x6c,0x0a,0x7e] +0xf7,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, exec_hi ; encoding: [0x7f,0x6c,0x0a,0x7e] +0x7f,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, exec_lo ; encoding: [0x7e,0x6c,0x0a,0x7e] +0x7e,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, m0 ; encoding: [0x7d,0x6c,0x0a,0x7e] +0x7d,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, s103 ; encoding: [0x67,0x6c,0x0a,0x7e] +0x67,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, s1 ; encoding: [0x01,0x6c,0x0a,0x7e] +0x01,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, v1 ; encoding: [0x01,0x6d,0x0a,0x7e] +0x01,0x6d,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, v255 ; encoding: [0xff,0x6d,0x0a,0x7e] +0xff,0x6d,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x6c,0x0a,0x7e] +0x6b,0x6c,0x0a,0x7e + +# GFX11: v_cos_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x6c,0x0a,0x7e] +0x6a,0x6c,0x0a,0x7e + +# GFX11: v_cubeid_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_cubeid_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0c,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_cubeid_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0c,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_cubeid_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_cubeid_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_cubeid_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_cubeid_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_cubeid_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_cubeid_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_cubeid_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_cubeid_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_cubeid_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_cubeid_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cubeid_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_cubeid_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x0c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x0c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_cubeid_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_cubeid_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_cubeid_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_cubeid_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_cubeid_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_cubeid_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_cubeid_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_cubeid_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0c,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_cubeid_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0c,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_cubeid_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_cubeid_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0c,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0c,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_cubema_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0f,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_cubema_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0f,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_cubema_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_cubema_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_cubema_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_cubema_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_cubema_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_cubema_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_cubema_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_cubema_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_cubema_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_cubema_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_cubema_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cubema_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_cubema_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x0f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x0f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_cubema_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_cubema_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_cubema_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_cubema_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_cubema_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_cubema_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_cubema_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_cubema_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0f,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_cubema_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0f,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_cubema_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_cubema_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0f,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0f,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_cubesc_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0d,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_cubesc_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0d,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_cubesc_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_cubesc_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_cubesc_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_cubesc_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_cubesc_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_cubesc_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_cubesc_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_cubesc_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_cubesc_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_cubesc_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_cubesc_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cubesc_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_cubesc_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x0d,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x0d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_cubesc_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_cubesc_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_cubesc_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_cubesc_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_cubesc_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_cubesc_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_cubesc_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_cubesc_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0d,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_cubesc_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0d,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_cubesc_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0d,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0d,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_cubesc_f32_e64_dpp v5, v1, v2, 1 row_shr:4 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x02,0x01,0x14,0x01,0xff + +# GFX11: v_cubesc_f32_e64_dpp v5, v1, v2, s2 row_shr:4 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0a,0x00,0x01,0x14,0x01,0xff + +# GFX11: v_cubetc_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_cubetc_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0e,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_cubetc_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0e,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_cubetc_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_cubetc_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_cubetc_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_cubetc_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_cubetc_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_cubetc_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_cubetc_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_cubetc_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_cubetc_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_cubetc_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_cubetc_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cubetc_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_cubetc_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x0e,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x0e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_cubetc_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_cubetc_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_cubetc_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_cubetc_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_cubetc_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_cubetc_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_cubetc_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_cubetc_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0e,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_cubetc_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0e,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_cubetc_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_cubetc_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0e,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0e,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_cvt_f16_f32_e32 v5, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e] +0xf0,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, 0 ; encoding: [0x80,0x14,0x0a,0x7e] +0x80,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x14,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x14,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f16_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x14,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x14,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f16_f32_e32 v5, -1 ; encoding: [0xc1,0x14,0x0a,0x7e] +0xc1,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, -4.0 ; encoding: [0xf7,0x14,0x0a,0x7e] +0xf7,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e] +0x7f,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e] +0x7e,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, m0 ; encoding: [0x7d,0x14,0x0a,0x7e] +0x7d,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, s103 ; encoding: [0x67,0x14,0x0a,0x7e] +0x67,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, s1 ; encoding: [0x01,0x14,0x0a,0x7e] +0x01,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e] +0x01,0x15,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, v255 ; encoding: [0xff,0x15,0x0a,0x7e] +0xff,0x15,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e] +0x6b,0x14,0x0a,0x7e + +# GFX11: v_cvt_f16_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e] +0x6a,0x14,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v255, v1 ; encoding: [0x01,0x17,0xfe,0x7f] +0x01,0x17,0xfe,0x7f + +# GFX11: v_cvt_f32_f16_e32 v5, 0 ; encoding: [0x80,0x16,0x0a,0x7e] +0x80,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, -1 ; encoding: [0xc1,0x16,0x0a,0x7e] +0xc1,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, exec_hi ; encoding: [0x7f,0x16,0x0a,0x7e] +0x7f,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, exec_lo ; encoding: [0x7e,0x16,0x0a,0x7e] +0x7e,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, m0 ; encoding: [0x7d,0x16,0x0a,0x7e] +0x7d,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, s103 ; encoding: [0x67,0x16,0x0a,0x7e] +0x67,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, s1 ; encoding: [0x01,0x16,0x0a,0x7e] +0x01,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, v1 ; encoding: [0x01,0x17,0x0a,0x7e] +0x01,0x17,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, vcc_hi ; encoding: [0x6b,0x16,0x0a,0x7e] +0x6b,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f16_e32 v5, vcc_lo ; encoding: [0x6a,0x16,0x0a,0x7e] +0x6a,0x16,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v255, v[1:2] ; encoding: [0x01,0x1f,0xfe,0x7f] +0x01,0x1f,0xfe,0x7f + +# GFX11: v_cvt_f32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x1e,0x0a,0x7e] +0xf0,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, 0 ; encoding: [0x80,0x1e,0x0a,0x7e] +0x80,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, 0x3f717273 ; encoding: [0xff,0x1e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x1e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f32_f64_e32 v5, 0xaf123456 ; encoding: [0xff,0x1e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x1e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_f64_e32 v5, -1 ; encoding: [0xc1,0x1e,0x0a,0x7e] +0xc1,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, -4.0 ; encoding: [0xf7,0x1e,0x0a,0x7e] +0xf7,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, exec ; encoding: [0x7e,0x1e,0x0a,0x7e] +0x7e,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, s[102:103] ; encoding: [0x66,0x1e,0x0a,0x7e] +0x66,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x1e,0x0a,0x7e] +0x02,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, s[4:5] ; encoding: [0x04,0x1e,0x0a,0x7e] +0x04,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x1f,0x0a,0x7e] +0x01,0x1f,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x1f,0x0a,0x7e] +0xfe,0x1f,0x0a,0x7e + +# GFX11: v_cvt_f32_f64_e32 v5, vcc ; encoding: [0x6a,0x1e,0x0a,0x7e] +0x6a,0x1e,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v255, v1 ; encoding: [0x01,0x23,0xfe,0x7f] +0x01,0x23,0xfe,0x7f + +# GFX11: v_cvt_f32_ubyte0_e32 v5, 0.5 ; encoding: [0xf0,0x22,0x0a,0x7e] +0xf0,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, 0 ; encoding: [0x80,0x22,0x0a,0x7e] +0x80,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, 0x3f717273 ; encoding: [0xff,0x22,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x22,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f32_ubyte0_e32 v5, 0xaf123456 ; encoding: [0xff,0x22,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x22,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte0_e32 v5, -1 ; encoding: [0xc1,0x22,0x0a,0x7e] +0xc1,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, -4.0 ; encoding: [0xf7,0x22,0x0a,0x7e] +0xf7,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, exec_hi ; encoding: [0x7f,0x22,0x0a,0x7e] +0x7f,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, exec_lo ; encoding: [0x7e,0x22,0x0a,0x7e] +0x7e,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, m0 ; encoding: [0x7d,0x22,0x0a,0x7e] +0x7d,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, s103 ; encoding: [0x67,0x22,0x0a,0x7e] +0x67,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, s1 ; encoding: [0x01,0x22,0x0a,0x7e] +0x01,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, v1 ; encoding: [0x01,0x23,0x0a,0x7e] +0x01,0x23,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, v255 ; encoding: [0xff,0x23,0x0a,0x7e] +0xff,0x23,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, vcc_hi ; encoding: [0x6b,0x22,0x0a,0x7e] +0x6b,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte0_e32 v5, vcc_lo ; encoding: [0x6a,0x22,0x0a,0x7e] +0x6a,0x22,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v255, v1 ; encoding: [0x01,0x25,0xfe,0x7f] +0x01,0x25,0xfe,0x7f + +# GFX11: v_cvt_f32_ubyte1_e32 v5, 0.5 ; encoding: [0xf0,0x24,0x0a,0x7e] +0xf0,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, 0 ; encoding: [0x80,0x24,0x0a,0x7e] +0x80,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, 0x3f717273 ; encoding: [0xff,0x24,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x24,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f32_ubyte1_e32 v5, 0xaf123456 ; encoding: [0xff,0x24,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x24,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte1_e32 v5, -1 ; encoding: [0xc1,0x24,0x0a,0x7e] +0xc1,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, -4.0 ; encoding: [0xf7,0x24,0x0a,0x7e] +0xf7,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, exec_hi ; encoding: [0x7f,0x24,0x0a,0x7e] +0x7f,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, exec_lo ; encoding: [0x7e,0x24,0x0a,0x7e] +0x7e,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, m0 ; encoding: [0x7d,0x24,0x0a,0x7e] +0x7d,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, s103 ; encoding: [0x67,0x24,0x0a,0x7e] +0x67,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, s1 ; encoding: [0x01,0x24,0x0a,0x7e] +0x01,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, v1 ; encoding: [0x01,0x25,0x0a,0x7e] +0x01,0x25,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, v255 ; encoding: [0xff,0x25,0x0a,0x7e] +0xff,0x25,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, vcc_hi ; encoding: [0x6b,0x24,0x0a,0x7e] +0x6b,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte1_e32 v5, vcc_lo ; encoding: [0x6a,0x24,0x0a,0x7e] +0x6a,0x24,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v255, v1 ; encoding: [0x01,0x27,0xfe,0x7f] +0x01,0x27,0xfe,0x7f + +# GFX11: v_cvt_f32_ubyte2_e32 v5, 0.5 ; encoding: [0xf0,0x26,0x0a,0x7e] +0xf0,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, 0 ; encoding: [0x80,0x26,0x0a,0x7e] +0x80,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, 0x3f717273 ; encoding: [0xff,0x26,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x26,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f32_ubyte2_e32 v5, 0xaf123456 ; encoding: [0xff,0x26,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x26,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte2_e32 v5, -1 ; encoding: [0xc1,0x26,0x0a,0x7e] +0xc1,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, -4.0 ; encoding: [0xf7,0x26,0x0a,0x7e] +0xf7,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, exec_hi ; encoding: [0x7f,0x26,0x0a,0x7e] +0x7f,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, exec_lo ; encoding: [0x7e,0x26,0x0a,0x7e] +0x7e,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, m0 ; encoding: [0x7d,0x26,0x0a,0x7e] +0x7d,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, s103 ; encoding: [0x67,0x26,0x0a,0x7e] +0x67,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, s1 ; encoding: [0x01,0x26,0x0a,0x7e] +0x01,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, v1 ; encoding: [0x01,0x27,0x0a,0x7e] +0x01,0x27,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, v255 ; encoding: [0xff,0x27,0x0a,0x7e] +0xff,0x27,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, vcc_hi ; encoding: [0x6b,0x26,0x0a,0x7e] +0x6b,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte2_e32 v5, vcc_lo ; encoding: [0x6a,0x26,0x0a,0x7e] +0x6a,0x26,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v255, v1 ; encoding: [0x01,0x29,0xfe,0x7f] +0x01,0x29,0xfe,0x7f + +# GFX11: v_cvt_f32_ubyte3_e32 v5, 0.5 ; encoding: [0xf0,0x28,0x0a,0x7e] +0xf0,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, 0 ; encoding: [0x80,0x28,0x0a,0x7e] +0x80,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, 0x3f717273 ; encoding: [0xff,0x28,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x28,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f32_ubyte3_e32 v5, 0xaf123456 ; encoding: [0xff,0x28,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x28,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte3_e32 v5, -1 ; encoding: [0xc1,0x28,0x0a,0x7e] +0xc1,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, -4.0 ; encoding: [0xf7,0x28,0x0a,0x7e] +0xf7,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, exec_hi ; encoding: [0x7f,0x28,0x0a,0x7e] +0x7f,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, exec_lo ; encoding: [0x7e,0x28,0x0a,0x7e] +0x7e,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, m0 ; encoding: [0x7d,0x28,0x0a,0x7e] +0x7d,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, s103 ; encoding: [0x67,0x28,0x0a,0x7e] +0x67,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, s1 ; encoding: [0x01,0x28,0x0a,0x7e] +0x01,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, v1 ; encoding: [0x01,0x29,0x0a,0x7e] +0x01,0x29,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, v255 ; encoding: [0xff,0x29,0x0a,0x7e] +0xff,0x29,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, vcc_hi ; encoding: [0x6b,0x28,0x0a,0x7e] +0x6b,0x28,0x0a,0x7e + +# GFX11: v_cvt_f32_ubyte3_e32 v5, vcc_lo ; encoding: [0x6a,0x28,0x0a,0x7e] +0x6a,0x28,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[254:255], v1 ; encoding: [0x01,0x21,0xfc,0x7f] +0x01,0x21,0xfc,0x7f + +# GFX11: v_cvt_f64_f32_e32 v[5:6], 0.5 ; encoding: [0xf0,0x20,0x0a,0x7e] +0xf0,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], 0 ; encoding: [0x80,0x20,0x0a,0x7e] +0x80,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x20,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x20,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f64_f32_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x20,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x20,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_f32_e32 v[5:6], -1 ; encoding: [0xc1,0x20,0x0a,0x7e] +0xc1,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], -4.0 ; encoding: [0xf7,0x20,0x0a,0x7e] +0xf7,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], exec_hi ; encoding: [0x7f,0x20,0x0a,0x7e] +0x7f,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], exec_lo ; encoding: [0x7e,0x20,0x0a,0x7e] +0x7e,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], m0 ; encoding: [0x7d,0x20,0x0a,0x7e] +0x7d,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], s103 ; encoding: [0x67,0x20,0x0a,0x7e] +0x67,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], s1 ; encoding: [0x01,0x20,0x0a,0x7e] +0x01,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], v1 ; encoding: [0x01,0x21,0x0a,0x7e] +0x01,0x21,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], v255 ; encoding: [0xff,0x21,0x0a,0x7e] +0xff,0x21,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], vcc_hi ; encoding: [0x6b,0x20,0x0a,0x7e] +0x6b,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_f32_e32 v[5:6], vcc_lo ; encoding: [0x6a,0x20,0x0a,0x7e] +0x6a,0x20,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[254:255], v1 ; encoding: [0x01,0x2d,0xfc,0x7f] +0x01,0x2d,0xfc,0x7f + +# GFX11: v_cvt_f64_u32_e32 v[5:6], 0.5 ; encoding: [0xf0,0x2c,0x0a,0x7e] +0xf0,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], 0 ; encoding: [0x80,0x2c,0x0a,0x7e] +0x80,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x2c,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x2c,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_f64_u32_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x2c,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x2c,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_u32_e32 v[5:6], -1 ; encoding: [0xc1,0x2c,0x0a,0x7e] +0xc1,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], -4.0 ; encoding: [0xf7,0x2c,0x0a,0x7e] +0xf7,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], exec_hi ; encoding: [0x7f,0x2c,0x0a,0x7e] +0x7f,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], exec_lo ; encoding: [0x7e,0x2c,0x0a,0x7e] +0x7e,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], m0 ; encoding: [0x7d,0x2c,0x0a,0x7e] +0x7d,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], s103 ; encoding: [0x67,0x2c,0x0a,0x7e] +0x67,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], s1 ; encoding: [0x01,0x2c,0x0a,0x7e] +0x01,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], v1 ; encoding: [0x01,0x2d,0x0a,0x7e] +0x01,0x2d,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], v255 ; encoding: [0xff,0x2d,0x0a,0x7e] +0xff,0x2d,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], vcc_hi ; encoding: [0x6b,0x2c,0x0a,0x7e] +0x6b,0x2c,0x0a,0x7e + +# GFX11: v_cvt_f64_u32_e32 v[5:6], vcc_lo ; encoding: [0x6a,0x2c,0x0a,0x7e] +0x6a,0x2c,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v255, v1 ; encoding: [0x01,0x1b,0xfe,0x7f] +0x01,0x1b,0xfe,0x7f + +# GFX11: v_cvt_floor_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x1a,0x0a,0x7e] +0xf0,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, 0 ; encoding: [0x80,0x1a,0x0a,0x7e] +0x80,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x1a,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x1a,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_floor_i32_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x1a,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x1a,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_floor_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x1a,0x0a,0x7e] +0xc1,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, -4.0 ; encoding: [0xf7,0x1a,0x0a,0x7e] +0xf7,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x1a,0x0a,0x7e] +0x7f,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x1a,0x0a,0x7e] +0x7e,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x1a,0x0a,0x7e] +0x7d,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, s103 ; encoding: [0x67,0x1a,0x0a,0x7e] +0x67,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, s1 ; encoding: [0x01,0x1a,0x0a,0x7e] +0x01,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, v1 ; encoding: [0x01,0x1b,0x0a,0x7e] +0x01,0x1b,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, v255 ; encoding: [0xff,0x1b,0x0a,0x7e] +0xff,0x1b,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x1a,0x0a,0x7e] +0x6b,0x1a,0x0a,0x7e + +# GFX11: v_cvt_floor_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x1a,0x0a,0x7e] +0x6a,0x1a,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v255, v1 ; encoding: [0x01,0x19,0xfe,0x7f] +0x01,0x19,0xfe,0x7f + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x18,0x0a,0x7e] +0xf0,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, 0 ; encoding: [0x80,0x18,0x0a,0x7e] +0x80,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x18,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x18,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x18,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x18,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x18,0x0a,0x7e] +0xc1,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, -4.0 ; encoding: [0xf7,0x18,0x0a,0x7e] +0xf7,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x18,0x0a,0x7e] +0x7f,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x18,0x0a,0x7e] +0x7e,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x18,0x0a,0x7e] +0x7d,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, s103 ; encoding: [0x67,0x18,0x0a,0x7e] +0x67,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, s1 ; encoding: [0x01,0x18,0x0a,0x7e] +0x01,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, v1 ; encoding: [0x01,0x19,0x0a,0x7e] +0x01,0x19,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, v255 ; encoding: [0xff,0x19,0x0a,0x7e] +0xff,0x19,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x18,0x0a,0x7e] +0x6b,0x18,0x0a,0x7e + +# GFX11: v_cvt_nearest_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x18,0x0a,0x7e] +0x6a,0x18,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v255, v1 ; encoding: [0x01,0x1d,0xfe,0x7f] +0x01,0x1d,0xfe,0x7f + +# GFX11: v_cvt_off_f32_i4_e32 v5, 0.5 ; encoding: [0xf0,0x1c,0x0a,0x7e] +0xf0,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, 0 ; encoding: [0x80,0x1c,0x0a,0x7e] +0x80,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, 0x41 ; encoding: [0xff,0x1c,0x0a,0x7e,0x41,0x00,0x00,0x00] +0xff,0x1c,0x0a,0x7e,0x41,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e32 v5, 0x4f ; encoding: [0xff,0x1c,0x0a,0x7e,0x4f,0x00,0x00,0x00] +0xff,0x1c,0x0a,0x7e,0x4f,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e32 v5, -1 ; encoding: [0xc1,0x1c,0x0a,0x7e] +0xc1,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, -4.0 ; encoding: [0xf7,0x1c,0x0a,0x7e] +0xf7,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, exec_hi ; encoding: [0x7f,0x1c,0x0a,0x7e] +0x7f,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, exec_lo ; encoding: [0x7e,0x1c,0x0a,0x7e] +0x7e,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, m0 ; encoding: [0x7d,0x1c,0x0a,0x7e] +0x7d,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, s103 ; encoding: [0x67,0x1c,0x0a,0x7e] +0x67,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, s1 ; encoding: [0x01,0x1c,0x0a,0x7e] +0x01,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, v1 ; encoding: [0x01,0x1d,0x0a,0x7e] +0x01,0x1d,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, v255 ; encoding: [0xff,0x1d,0x0a,0x7e] +0xff,0x1d,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, vcc_hi ; encoding: [0x6b,0x1c,0x0a,0x7e] +0x6b,0x1c,0x0a,0x7e + +# GFX11: v_cvt_off_f32_i4_e32 v5, vcc_lo ; encoding: [0x6a,0x1c,0x0a,0x7e] +0x6a,0x1c,0x0a,0x7e + +# GFX11: v_cvt_pk_i16_i32 v255, v1, v2 ; encoding: [0xff,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x24,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, 0, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, -1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x24,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, m0, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, s103, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, s1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x24,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, 0 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x24,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, -1 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x24,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x24,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, m0 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, s103 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, s2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, v255 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x24,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x24,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, v255, v2 ; encoding: [0x05,0x00,0x24,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x24,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_i16_i32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x24,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x12,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x12,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x12,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x12,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x12,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x12,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x12,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x12,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x12,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x21,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x21,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x21,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x21,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x21,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x21,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x21,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x21,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x21,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_cvt_pknorm_i16_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x21,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x21,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x21,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x21,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x13,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x13,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x13,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x13,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x13,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x13,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x13,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x13,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x13,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x22,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x22,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x22,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x22,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x22,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x22,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x22,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x22,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x22,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_cvt_pknorm_u16_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x22,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x22,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x22,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x22,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x23,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, 0, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, -1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x23,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, m0, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, s103, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, s1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x23,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, 0 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x23,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, -1 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x23,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x23,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, m0 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, s103 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, s2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, v255 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x23,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x23,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, v255, v2 ; encoding: [0x05,0x00,0x23,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x23,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u16_u32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x23,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_cvt_pk_u8_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x26,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x26,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x26,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x26,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x26,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x26,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x26,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x26,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x26,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x26,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x26,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x26,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x26,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x26,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x26,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x26,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_cvt_pk_u8_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x26,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_cvt_u32_f64_e32 v255, v[1:2] ; encoding: [0x01,0x2b,0xfe,0x7f] +0x01,0x2b,0xfe,0x7f + +# GFX11: v_cvt_u32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x2a,0x0a,0x7e] +0xf0,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, 0 ; encoding: [0x80,0x2a,0x0a,0x7e] +0x80,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, 0x3f717273 ; encoding: [0xff,0x2a,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x2a,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_cvt_u32_f64_e32 v5, 0xaf123456 ; encoding: [0xff,0x2a,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x2a,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_u32_f64_e32 v5, -1 ; encoding: [0xc1,0x2a,0x0a,0x7e] +0xc1,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, -4.0 ; encoding: [0xf7,0x2a,0x0a,0x7e] +0xf7,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, exec ; encoding: [0x7e,0x2a,0x0a,0x7e] +0x7e,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, s[102:103] ; encoding: [0x66,0x2a,0x0a,0x7e] +0x66,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x2a,0x0a,0x7e] +0x02,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, s[4:5] ; encoding: [0x04,0x2a,0x0a,0x7e] +0x04,0x2a,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x2b,0x0a,0x7e] +0x01,0x2b,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x2b,0x0a,0x7e] +0xfe,0x2b,0x0a,0x7e + +# GFX11: v_cvt_u32_f64_e32 v5, vcc ; encoding: [0x6a,0x2a,0x0a,0x7e] +0x6a,0x2a,0x0a,0x7e + +# GFX11: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x54,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x54,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x54,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x54,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x54,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_div_fixup_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x27,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x27,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x27,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x27,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x27,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x27,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x27,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_div_fixup_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x27,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_div_fixup_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x27,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_div_fixup_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x27,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_div_fixup_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x27,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_div_fixup_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x27,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_div_fixup_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x27,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x27,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_div_fixup_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_div_fixup_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_div_fixup_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_div_fixup_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_div_fixup_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_div_fixup_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_div_fixup_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x27,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_div_fixup_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_div_fixup_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x27,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x27,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[254:255], v[1:2], v[2:3], v[3:4] ; encoding: [0xfe,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], 0.5, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], 0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], -1, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], -4.0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], exec, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], s[102:103], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x66,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x66,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], s[2:3], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x02,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x02,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], s[4:5], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x04,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x04,0x04,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], 0.5, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x28,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], 0, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x28,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], -1, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x28,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], -4.0, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x28,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], exec, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x28,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], s[102:103], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xcd,0x0c,0x04] +0x05,0x00,0x28,0xd6,0x01,0xcd,0x0c,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], s[4:5], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x09,0x0c,0x04] +0x05,0x00,0x28,0xd6,0x01,0x09,0x0c,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], s[6:7], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x0d,0x0c,0x04] +0x05,0x00,0x28,0xd6,0x01,0x0d,0x0c,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], 0.5 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x28,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], 0 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x28,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], -1 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x28,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], -4.0 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x28,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], exec ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x28,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], s[102:103] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x9a,0x01] +0x05,0x00,0x28,0xd6,0x01,0x05,0x9a,0x01 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], s[6:7] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x00,0x28,0xd6,0x01,0x05,0x1a,0x00 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], s[8:9] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x22,0x00] +0x05,0x00,0x28,0xd6,0x01,0x05,0x22,0x00 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[254:255] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x00,0x28,0xd6,0x01,0x05,0xfa,0x07 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] clamp ; encoding: [0x05,0x80,0x28,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x28,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] div:2 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], -v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], -v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_div_fixup_f64 v[5:6], -v[1:2], -v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:2 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:4 ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], vcc ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x28,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[254:255], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xfd,0x0f,0x04] +0x05,0x00,0x28,0xd6,0x01,0xfd,0x0f,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], vcc, v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x28,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], v[254:255], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0xfe,0x05,0x0e,0x04] +0x05,0x00,0x28,0xd6,0xfe,0x05,0x0e,0x04 + +# GFX11: v_div_fixup_f64 v[5:6], vcc, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x37,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x37,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x37,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x37,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x37,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x37,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x37,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_div_fmas_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x37,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_div_fmas_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x37,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_div_fmas_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x37,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_div_fmas_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x37,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x37,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_div_fmas_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x37,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x37,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_div_fmas_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_div_fmas_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_div_fmas_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_div_fmas_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_div_fmas_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_div_fmas_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x37,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_div_fmas_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x37,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[254:255], v[1:2], v[2:3], v[3:4] ; encoding: [0xfe,0x00,0x38,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x00,0x38,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], 0.5, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x38,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], 0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x38,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], -1, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x38,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], -4.0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x38,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0.5, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x38,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x38,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], -1, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x38,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], -4.0, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x38,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], 0.5 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x38,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], 0 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x38,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], -1 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x38,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], -4.0 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x38,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[254:255] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x00,0x38,0xd6,0x01,0x05,0xfa,0x07 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] clamp ; encoding: [0x05,0x80,0x38,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x38,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] div:2 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], -v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], -v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_div_fmas_f64 v[5:6], -v[1:2], -v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:2 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:4 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x38,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], v[254:255], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xfd,0x0f,0x04] +0x05,0x00,0x38,0xd6,0x01,0xfd,0x0f,0x04 + +# GFX11: v_div_fmas_f64 v[5:6], v[254:255], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xfe,0x05,0x0e,0x04] +0x05,0x00,0x38,0xd6,0xfe,0x05,0x0e,0x04 + +# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x66,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x00,0x66,0xd6,0x01,0x05,0x0e,0x04 + +# op_sel[1:0] are ignored +# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x78,0x66,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_dot2_f16_f16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x60,0x66,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_dot2_f16_f16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] ; encoding: [0x00,0x65,0x66,0xd6,0x01,0x05,0x0e,0xc4] +0x00,0x65,0x66,0xd6,0x01,0x05,0x0e,0xc4 + +# GFX11: v_dot2_f16_f16 v5, -v255, v255, |s3| ; encoding: [0x05,0x04,0x66,0xd6,0xff,0xff,0x0f,0x20] +0x05,0x04,0x66,0xd6,0xff,0xff,0x0f,0x20 + +# GFX11: v_dot2_f16_f16 v5, -|s1|, -|s2|, v255 ; encoding: [0x05,0x03,0x66,0xd6,0x01,0x04,0xfc,0x67] +0x05,0x03,0x66,0xd6,0x01,0x04,0xfc,0x67 + +# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x67,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x00,0x67,0xd6,0x01,0x05,0x0e,0x04 + +# op_sel[1:0] are ignored +# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x78,0x67,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_dot2_bf16_bf16 v0, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04] +0x00,0x60,0x67,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_dot2_bf16_bf16 v0, |v1|, -v2, -|v3| op_sel:[0,0,1,1] ; encoding: [0x00,0x65,0x67,0xd6,0x01,0x05,0x0e,0xc4] +0x00,0x65,0x67,0xd6,0x01,0x05,0x0e,0xc4 + +# GFX11: v_dot2_bf16_bf16 v5, -v255, v255, |s3| ; encoding: [0x05,0x04,0x67,0xd6,0xff,0xff,0x0f,0x20] +0x05,0x04,0x67,0xd6,0xff,0xff,0x0f,0x20 + +# GFX11: v_dot2_bf16_bf16 v5, -|s1|, -|s2|, v255 ; encoding: [0x05,0x03,0x67,0xd6,0x01,0x04,0xfc,0x67] +0x05,0x03,0x67,0xd6,0x01,0x04,0xfc,0x67 + +# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] +0x01,0x05,0x0a,0x04 + +# GFX11: v_dot4_i32_iu8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x1a,0x1c] +0x03,0x40,0x16,0xcc,0x04,0x0b,0x1a,0x1c + +# GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 neg_lo:[1,1,0] ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x7a] +0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x7a + +# GFX11: v_dot4_i32_iu8 v3, v4, v5, 15 ; encoding: [0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x1a] +0x03,0x40,0x16,0xcc,0x04,0x0b,0x3e,0x1a + +# GFX11: v_dot4_u32_u8 v3, v4, v5, v6 ; encoding: [0x03,0x40,0x17,0xcc,0x04,0x0b,0x1a,0x1c] +0x03,0x40,0x17,0xcc,0x04,0x0b,0x1a,0x1c + +# GFX11: v_dot8_i32_iu4 v3, v4, v5, 15 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x3e,0x3a] +0x03,0x40,0x18,0xcc,0x04,0x0b,0x3e,0x3a + +# GFX11: v_dot8_i32_iu4 v3, v4, v5, v0 ; encoding: [0x03,0x40,0x18,0xcc,0x04,0x0b,0x02,0x1c] +0x03,0x40,0x18,0xcc,0x04,0x0b,0x02,0x1c + +# GFX11: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c + +# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf] +0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf + +# W32: v_dual_cndmask_b32 v20, v21, v22 :: v_dual_mov_b32 v41, v42 ; encoding: [0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14] +0x15,0x2d,0x50,0xca,0x2a,0x01,0x28,0x14 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_and_b32 v247, v160, v98 ; encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_fmamk_f32 v3, v6, 0x402f6c8b, v1 ; encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 :: v_dual_add_f32 v5, 0xaf123456, v2 ; encoding: [0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf] +0x03,0x03,0x48,0xc8,0xff,0x04,0x04,0x06,0x56,0x34,0x12,0xaf + +# W32: v_dual_fmac_f32 v0, v1, v2 :: v_dual_fmamk_f32 v3, v6, 0x3f700000, v1 ; encoding: [0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f] +0x01,0x05,0x04,0xc8,0x06,0x03,0x02,0x00,0x00,0x00,0x70,0x3f + +# W32: v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde] +0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde + +# W32: v_dual_fmamk_f32 v122, 0xff, 0xff, v161 :: v_dual_fmamk_f32 v123, 0xff, 0xff, v162 ; encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00] +0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xff,0x00,0x00,0x00 + +# W32: v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, v160, v99 ; encoding: [0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0] +0x4a,0x43,0xa3,0xc8,0xa0,0xc7,0xf6,0x7a,0x23,0x29,0x17,0xa0 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 0x402f6c8b ; encoding: [0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x51,0xc8,0xff,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b :: v_dual_mov_b32 v247, 2 ; encoding: [0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] +0x4a,0x42,0x51,0xc8,0x82,0x00,0xf6,0x7a,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_min_f32 v0, v1, v2 :: v_dual_max_f32 v3, v4, v5 ; encoding: [0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00] +0x01,0x05,0xd4,0xca,0x04,0x0b,0x02,0x00 + +# W32: v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 0x402f6c8b ; encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40] +0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40 + +# W32: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v3 ; encoding: [0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x01,0x04,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# W32: v_dual_mul_dx9_zero_f32 v11, 0x24681357, v2 :: v_dual_dot2acc_f32_f16 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0xff,0x04,0xd8,0xc9,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 ; encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00] +0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00 + +# W32: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 ; encoding: [0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00] +0x00,0x05,0xc6,0xc8,0x01,0x07,0x00,0x00 + +# W32: v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] +0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24 + +# W32: v_dual_subrev_f32 v0, v1, v2 :: v_dual_add_nc_u32 v3, v4, v5 ; encoding: [0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00] +0x01,0x05,0xa0,0xc9,0x04,0x0b,0x02,0x00 + +# GFX11: v_exp_f32_e32 v255, v1 ; encoding: [0x01,0x4b,0xfe,0x7f] +0x01,0x4b,0xfe,0x7f + +# GFX11: v_exp_f32_e32 v5, 0.5 ; encoding: [0xf0,0x4a,0x0a,0x7e] +0xf0,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, 0 ; encoding: [0x80,0x4a,0x0a,0x7e] +0x80,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x4a,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x4a,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_exp_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x4a,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x4a,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_exp_f32_e32 v5, -1 ; encoding: [0xc1,0x4a,0x0a,0x7e] +0xc1,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, -4.0 ; encoding: [0xf7,0x4a,0x0a,0x7e] +0xf7,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, exec_hi ; encoding: [0x7f,0x4a,0x0a,0x7e] +0x7f,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, exec_lo ; encoding: [0x7e,0x4a,0x0a,0x7e] +0x7e,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, m0 ; encoding: [0x7d,0x4a,0x0a,0x7e] +0x7d,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, s103 ; encoding: [0x67,0x4a,0x0a,0x7e] +0x67,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, s1 ; encoding: [0x01,0x4a,0x0a,0x7e] +0x01,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, v1 ; encoding: [0x01,0x4b,0x0a,0x7e] +0x01,0x4b,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, v255 ; encoding: [0xff,0x4b,0x0a,0x7e] +0xff,0x4b,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x4a,0x0a,0x7e] +0x6b,0x4a,0x0a,0x7e + +# GFX11: v_exp_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x4a,0x0a,0x7e] +0x6a,0x4a,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v255, v1 ; encoding: [0x01,0x49,0xfe,0x7f] +0x01,0x49,0xfe,0x7f + +# GFX11: v_floor_f32_e32 v5, 0.5 ; encoding: [0xf0,0x48,0x0a,0x7e] +0xf0,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, 0 ; encoding: [0x80,0x48,0x0a,0x7e] +0x80,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x48,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x48,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_floor_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x48,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x48,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f32_e32 v5, -1 ; encoding: [0xc1,0x48,0x0a,0x7e] +0xc1,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, -4.0 ; encoding: [0xf7,0x48,0x0a,0x7e] +0xf7,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, exec_hi ; encoding: [0x7f,0x48,0x0a,0x7e] +0x7f,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, exec_lo ; encoding: [0x7e,0x48,0x0a,0x7e] +0x7e,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, m0 ; encoding: [0x7d,0x48,0x0a,0x7e] +0x7d,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, s103 ; encoding: [0x67,0x48,0x0a,0x7e] +0x67,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, s1 ; encoding: [0x01,0x48,0x0a,0x7e] +0x01,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, v1 ; encoding: [0x01,0x49,0x0a,0x7e] +0x01,0x49,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, v255 ; encoding: [0xff,0x49,0x0a,0x7e] +0xff,0x49,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x48,0x0a,0x7e] +0x6b,0x48,0x0a,0x7e + +# GFX11: v_floor_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x48,0x0a,0x7e] +0x6a,0x48,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x35,0xfc,0x7f] +0x01,0x35,0xfc,0x7f + +# GFX11: v_floor_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x34,0x0a,0x7e] +0xf0,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], 0 ; encoding: [0x80,0x34,0x0a,0x7e] +0x80,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x34,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x34,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_floor_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x34,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x34,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x34,0x0a,0x7e] +0xc1,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x34,0x0a,0x7e] +0xf7,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], exec ; encoding: [0x7e,0x34,0x0a,0x7e] +0x7e,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x34,0x0a,0x7e] +0x66,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x34,0x0a,0x7e] +0x02,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x34,0x0a,0x7e] +0x04,0x34,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x35,0x0a,0x7e] +0x01,0x35,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x35,0x0a,0x7e] +0xfe,0x35,0x0a,0x7e + +# GFX11: v_floor_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x34,0x0a,0x7e] +0x6a,0x34,0x0a,0x7e + +# GFX11: v_fmaak_f32 v255, v1, v2, 0x11213141 ; encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11] +0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f32 v5, 0.5, v2, 0x11213141 ; encoding: [0xf0,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] +0xf0,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, 0.5, v2, 0x1121 ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] +0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, 0, v2, 0x11213141 ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] +0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, 0, v2, 0x1121 ; encoding: [0x80,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] +0x80,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, -1, v2, 0x11213141 ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] +0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, -1, v2, 0x1121 ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] +0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, -4.0, v2, 0x11213141 ; encoding: [0xf7,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11] +0xf7,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, -4.0, v2, 0x1121 ; encoding: [0xf7,0x04,0x0a,0x70,0x21,0x11,0x00,0x00] +0xf7,0x04,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, v1, v2, 0x11213141 ; encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11] +0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, v1, v2, 0x1121 ; encoding: [0x01,0x05,0x0a,0x70,0x21,0x11,0x00,0x00] +0x01,0x05,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, v1, v2, 0xa1b1c1d1 ; encoding: [0x01,0x05,0x0a,0x5a,0xd1,0xc1,0xb1,0xa1] +0x01,0x05,0x0a,0x5a,0xd1,0xc1,0xb1,0xa1 + +# GFX11: v_fmaak_f16 v5, v1, v2, 0xa1b1 ; encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00] +0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00 + +# GFX11: v_fmaak_f32 v5, v1, v255, 0x11213141 ; encoding: [0x01,0xff,0x0b,0x5a,0x41,0x31,0x21,0x11] +0x01,0xff,0x0b,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f32 v5, v255, v2, 0x11213141 ; encoding: [0xff,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11] +0xff,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11 + +# GFX11: v_fmaak_f16 v5, v255, v2, 0x1121 ; encoding: [0xff,0x05,0x0a,0x70,0x21,0x11,0x00,0x00] +0xff,0x05,0x0a,0x70,0x21,0x11,0x00,0x00 + +# GFX11: v_fmac_f16_e32 v5, 0x1234, v2 ; encoding: [0xff,0x04,0x0a,0x6c,0x34,0x12,0x00,0x00] +0xff,0x04,0x0a,0x6c,0x34,0x12,0x00,0x00 + +# GFX11: v_fmac_f16_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x6c] +0x01,0x05,0x00,0x6c + +# GFX11: v_fmac_f16_e64 v0, |v1|, -v2 ; encoding: [0x00,0x01,0x36,0xd5,0x01,0x05,0x02,0x40] +0x00,0x01,0x36,0xd5,0x01,0x05,0x02,0x40 + +# GFX11: v_fmac_f16_e64 v0, s1, 2.0 ; encoding: [0x00,0x00,0x36,0xd5,0x01,0xe8,0x01,0x00] +0x00,0x00,0x36,0xd5,0x01,0xe8,0x01,0x00 + +# GFX11: v_fmac_f32_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x56] +0x01,0x05,0x00,0x56 + +# GFX11: v_fmac_f32_e64 v0, |v1|, -v2 ; encoding: [0x00,0x01,0x2b,0xd5,0x01,0x05,0x02,0x40] +0x00,0x01,0x2b,0xd5,0x01,0x05,0x02,0x40 + +# GFX11: v_fmac_f32_e64 v0, s1, 2.0 ; encoding: [0x00,0x00,0x2b,0xd5,0x01,0xe8,0x01,0x00] +0x00,0x00,0x2b,0xd5,0x01,0xe8,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x0c] +0x01,0x05,0x00,0x0c + +# GFX11: v_fmac_dx9_zero_f32_e64 v0, |v1|, -v2 ; encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40] +0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40 + +# GFX11: v_fmac_dx9_zero_f32_e64 v0, s1, 2.0 ; encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00] +0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00 + +# GFX11: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x48,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x48,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_fma_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x13,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x13,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x13,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_fma_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x13,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_fma_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x13,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_fma_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x13,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_fma_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x13,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_fma_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x13,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_fma_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x13,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_fma_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x13,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_fma_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x13,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_fma_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_fma_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_fma_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_fma_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_fma_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x13,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_fma_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x13,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x13,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_fma_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_fma_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_fma_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_fma_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_fma_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_fma_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_fma_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_fma_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_fma_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_fma_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x13,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_fma_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x13,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_fma_f32 v5, v1, v2, null ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0xf2,0x01] +0x05,0x00,0x13,0xd6,0x01,0x05,0xf2,0x01 + +# GFX11: v_fma_f32 v5, v1, null, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xf9,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x01,0xf9,0x0c,0x04 + +# GFX11: v_fma_f32_e64_dpp v80, v81, v82, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v80, v81, |v82|, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v93, |v94|, v95, v94 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe] +0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe + +# GFX11: v_fma_dx9_zero_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x09,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x09,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x09,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x09,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x09,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x09,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x09,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x09,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x09,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x09,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x09,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x09,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_fma_dx9_zero_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x09,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x09,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_fma_dx9_zero_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x09,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[254:255], v[1:2], v[2:3], v[3:4] ; encoding: [0xfe,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], 0.5, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], 0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], -1, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], -4.0, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], exec, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], s[102:103], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x66,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x66,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], s[2:3], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x02,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x02,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], s[4:5], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x04,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x04,0x04,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], 0.5, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x14,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], 0, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x14,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], -1, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x14,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], -4.0, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x14,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], exec, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x14,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], s[102:103], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xcd,0x0c,0x04] +0x05,0x00,0x14,0xd6,0x01,0xcd,0x0c,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], s[4:5], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x09,0x0c,0x04] +0x05,0x00,0x14,0xd6,0x01,0x09,0x0c,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], s[6:7], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x0d,0x0c,0x04] +0x05,0x00,0x14,0xd6,0x01,0x0d,0x0c,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], 0.5 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x14,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], 0 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x14,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], -1 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x14,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], -4.0 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x14,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], exec ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x14,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], s[102:103] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x9a,0x01] +0x05,0x00,0x14,0xd6,0x01,0x05,0x9a,0x01 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], s[6:7] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x00,0x14,0xd6,0x01,0x05,0x1a,0x00 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], s[8:9] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x22,0x00] +0x05,0x00,0x14,0xd6,0x01,0x05,0x22,0x00 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[254:255] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x00,0x14,0xd6,0x01,0x05,0xfa,0x07 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] clamp ; encoding: [0x05,0x80,0x14,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x14,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] div:2 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], -v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_fma_f64 v[5:6], v[1:2], -v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_fma_f64 v[5:6], -v[1:2], -v[2:3], -v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:2 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] mul:4 ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], vcc ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x14,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_fma_f64 v[5:6], v[1:2], v[254:255], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xfd,0x0f,0x04] +0x05,0x00,0x14,0xd6,0x01,0xfd,0x0f,0x04 + +# GFX11: v_fma_f64 v[5:6], v[1:2], vcc, v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x14,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_fma_f64 v[5:6], v[254:255], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0xfe,0x05,0x0e,0x04] +0x05,0x00,0x14,0xd6,0xfe,0x05,0x0e,0x04 + +# GFX11: v_fma_f64 v[5:6], vcc, v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_fmamk_f32 v255, v1, 0x11213141, v3 ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11] +0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f32 v5, 0, 0x11213141, v3 ; encoding: [0x80,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] +0x80,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, 0, 0x1121, v3 ; encoding: [0x80,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00] +0x80,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, 0.5, 0x11213141, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] +0xf0,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, 0.5, 0x1121, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00] +0xf0,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, -1, 0x11213141, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] +0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, -1, 0x1121, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00] +0xc1,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, -4.0, 0x11213141, v3 ; encoding: [0xf7,0x06,0x0a,0x58,0x41,0x31,0x21,0x11] +0xf7,0x06,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, -4.0, 0x1121, v3 ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00] +0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, v1, 0x11213141, v255 ; encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11] +0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f32 v5, v1, 0x11213141, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x41,0x31,0x21,0x11] +0x01,0x07,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, v1, 0x1121, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00] +0x01,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, v1, 0xa1b1c1d1, v3 ; encoding: [0x01,0x07,0x0a,0x58,0xd1,0xc1,0xb1,0xa1] +0x01,0x07,0x0a,0x58,0xd1,0xc1,0xb1,0xa1 + +# GFX11: v_fmamk_f16 v5, v1, 0xa1b1, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0xb1,0xa1,0x00,0x00] +0x01,0x07,0x0a,0x6e,0xb1,0xa1,0x00,0x00 + +# GFX11: v_fmamk_f32 v5, v255, 0x11213141, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x41,0x31,0x21,0x11] +0xff,0x07,0x0a,0x58,0x41,0x31,0x21,0x11 + +# GFX11: v_fmamk_f16 v5, v255, 0x1121, v3 ; encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00] +0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00 + +# GFX11: v_fract_f32_e32 v255, v1 ; encoding: [0x01,0x41,0xfe,0x7f] +0x01,0x41,0xfe,0x7f + +# GFX11: v_fract_f32_e32 v5, 0.5 ; encoding: [0xf0,0x40,0x0a,0x7e] +0xf0,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, 0 ; encoding: [0x80,0x40,0x0a,0x7e] +0x80,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x40,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x40,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_fract_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x40,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x40,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f32_e32 v5, -1 ; encoding: [0xc1,0x40,0x0a,0x7e] +0xc1,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, -4.0 ; encoding: [0xf7,0x40,0x0a,0x7e] +0xf7,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, exec_hi ; encoding: [0x7f,0x40,0x0a,0x7e] +0x7f,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, exec_lo ; encoding: [0x7e,0x40,0x0a,0x7e] +0x7e,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, m0 ; encoding: [0x7d,0x40,0x0a,0x7e] +0x7d,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, s103 ; encoding: [0x67,0x40,0x0a,0x7e] +0x67,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, s1 ; encoding: [0x01,0x40,0x0a,0x7e] +0x01,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, v1 ; encoding: [0x01,0x41,0x0a,0x7e] +0x01,0x41,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, v255 ; encoding: [0xff,0x41,0x0a,0x7e] +0xff,0x41,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x40,0x0a,0x7e] +0x6b,0x40,0x0a,0x7e + +# GFX11: v_fract_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x40,0x0a,0x7e] +0x6a,0x40,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x7d,0xfc,0x7f] +0x01,0x7d,0xfc,0x7f + +# GFX11: v_fract_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x7c,0x0a,0x7e] +0xf0,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], 0 ; encoding: [0x80,0x7c,0x0a,0x7e] +0x80,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x7c,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x7c,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_fract_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x7c,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x7c,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x7c,0x0a,0x7e] +0xc1,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x7c,0x0a,0x7e] +0xf7,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], exec ; encoding: [0x7e,0x7c,0x0a,0x7e] +0x7e,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x7c,0x0a,0x7e] +0x66,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x7c,0x0a,0x7e] +0x02,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x7c,0x0a,0x7e] +0x04,0x7c,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x7d,0x0a,0x7e] +0x01,0x7d,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x7d,0x0a,0x7e] +0xfe,0x7d,0x0a,0x7e + +# GFX11: v_fract_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x7c,0x0a,0x7e] +0x6a,0x7c,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v255, v1 ; encoding: [0x01,0x7f,0xfe,0x7f] +0x01,0x7f,0xfe,0x7f + +# GFX11: v_frexp_exp_i32_f32_e32 v5, 0.5 ; encoding: [0xf0,0x7e,0x0a,0x7e] +0xf0,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, 0 ; encoding: [0x80,0x7e,0x0a,0x7e] +0x80,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x7e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x7e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_frexp_exp_i32_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x7e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x7e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i32_f32_e32 v5, -1 ; encoding: [0xc1,0x7e,0x0a,0x7e] +0xc1,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, -4.0 ; encoding: [0xf7,0x7e,0x0a,0x7e] +0xf7,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, exec_hi ; encoding: [0x7f,0x7e,0x0a,0x7e] +0x7f,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, exec_lo ; encoding: [0x7e,0x7e,0x0a,0x7e] +0x7e,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, m0 ; encoding: [0x7d,0x7e,0x0a,0x7e] +0x7d,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, s103 ; encoding: [0x67,0x7e,0x0a,0x7e] +0x67,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, s1 ; encoding: [0x01,0x7e,0x0a,0x7e] +0x01,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, v1 ; encoding: [0x01,0x7f,0x0a,0x7e] +0x01,0x7f,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, v255 ; encoding: [0xff,0x7f,0x0a,0x7e] +0xff,0x7f,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x7e,0x0a,0x7e] +0x6b,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x7e,0x0a,0x7e] +0x6a,0x7e,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v255, v[1:2] ; encoding: [0x01,0x79,0xfe,0x7f] +0x01,0x79,0xfe,0x7f + +# GFX11: v_frexp_exp_i32_f64_e32 v5, 0.5 ; encoding: [0xf0,0x78,0x0a,0x7e] +0xf0,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, 0 ; encoding: [0x80,0x78,0x0a,0x7e] +0x80,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, 0x3f717273 ; encoding: [0xff,0x78,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x78,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_frexp_exp_i32_f64_e32 v5, 0xaf123456 ; encoding: [0xff,0x78,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x78,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i32_f64_e32 v5, -1 ; encoding: [0xc1,0x78,0x0a,0x7e] +0xc1,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, -4.0 ; encoding: [0xf7,0x78,0x0a,0x7e] +0xf7,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, exec ; encoding: [0x7e,0x78,0x0a,0x7e] +0x7e,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, s[102:103] ; encoding: [0x66,0x78,0x0a,0x7e] +0x66,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, s[2:3] ; encoding: [0x02,0x78,0x0a,0x7e] +0x02,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, s[4:5] ; encoding: [0x04,0x78,0x0a,0x7e] +0x04,0x78,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, v[1:2] ; encoding: [0x01,0x79,0x0a,0x7e] +0x01,0x79,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, v[254:255] ; encoding: [0xfe,0x79,0x0a,0x7e] +0xfe,0x79,0x0a,0x7e + +# GFX11: v_frexp_exp_i32_f64_e32 v5, vcc ; encoding: [0x6a,0x78,0x0a,0x7e] +0x6a,0x78,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v255, v1 ; encoding: [0x01,0x81,0xfe,0x7f] +0x01,0x81,0xfe,0x7f + +# GFX11: v_frexp_mant_f32_e32 v5, 0.5 ; encoding: [0xf0,0x80,0x0a,0x7e] +0xf0,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, 0 ; encoding: [0x80,0x80,0x0a,0x7e] +0x80,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x80,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x80,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_frexp_mant_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x80,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x80,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f32_e32 v5, -1 ; encoding: [0xc1,0x80,0x0a,0x7e] +0xc1,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, -4.0 ; encoding: [0xf7,0x80,0x0a,0x7e] +0xf7,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, exec_hi ; encoding: [0x7f,0x80,0x0a,0x7e] +0x7f,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, exec_lo ; encoding: [0x7e,0x80,0x0a,0x7e] +0x7e,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, m0 ; encoding: [0x7d,0x80,0x0a,0x7e] +0x7d,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, s103 ; encoding: [0x67,0x80,0x0a,0x7e] +0x67,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, s1 ; encoding: [0x01,0x80,0x0a,0x7e] +0x01,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, v1 ; encoding: [0x01,0x81,0x0a,0x7e] +0x01,0x81,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, v255 ; encoding: [0xff,0x81,0x0a,0x7e] +0xff,0x81,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x80,0x0a,0x7e] +0x6b,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x80,0x0a,0x7e] +0x6a,0x80,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x7b,0xfc,0x7f] +0x01,0x7b,0xfc,0x7f + +# GFX11: v_frexp_mant_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x7a,0x0a,0x7e] +0xf0,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], 0 ; encoding: [0x80,0x7a,0x0a,0x7e] +0x80,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x7a,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x7a,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_frexp_mant_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x7a,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x7a,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x7a,0x0a,0x7e] +0xc1,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x7a,0x0a,0x7e] +0xf7,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], exec ; encoding: [0x7e,0x7a,0x0a,0x7e] +0x7e,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x7a,0x0a,0x7e] +0x66,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x7a,0x0a,0x7e] +0x02,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x7a,0x0a,0x7e] +0x04,0x7a,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x7b,0x0a,0x7e] +0x01,0x7b,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x7b,0x0a,0x7e] +0xfe,0x7b,0x0a,0x7e + +# GFX11: v_frexp_mant_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x7a,0x0a,0x7e] +0x6a,0x7a,0x0a,0x7e + +# GFX11: v_ldexp_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_ldexp_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_ldexp_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_ldexp_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_ldexp_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_ldexp_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x1c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x1c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_ldexp_f32 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_ldexp_f32 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_ldexp_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_ldexp_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_ldexp_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[254:255], v[1:2], v2 ; encoding: [0xfe,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], 0.5, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], 0, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], -1, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], -4.0, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], exec, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], s[102:103], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], s[2:3], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], s[4:5], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], exec_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], exec_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], m0 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s103 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 clamp ; encoding: [0x05,0x80,0x2b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x2b,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 div:2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], -v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 mul:2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 mul:4 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], vcc_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[1:2], vcc_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_ldexp_f64 v[5:6], v[254:255], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_ldexp_f64 v[5:6], vcc, v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_lerp_u8 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x15,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x15,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x15,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_lerp_u8 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x15,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_lerp_u8 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x15,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_lerp_u8 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x15,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_lerp_u8 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x15,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_lerp_u8 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x15,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_lerp_u8 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x15,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_lerp_u8 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x15,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_lerp_u8 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x15,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_lerp_u8 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_lerp_u8 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_lerp_u8 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_lerp_u8 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_lerp_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_lerp_u8 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x15,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_lerp_u8 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_lerp_u8 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x15,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_lerp_u8 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_lerp_u8 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x15,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_lerp_u8 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x15,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_log_f32_e32 v255, v1 ; encoding: [0x01,0x4f,0xfe,0x7f] +0x01,0x4f,0xfe,0x7f + +# GFX11: v_log_f32_e32 v5, 0.5 ; encoding: [0xf0,0x4e,0x0a,0x7e] +0xf0,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, 0 ; encoding: [0x80,0x4e,0x0a,0x7e] +0x80,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x4e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x4e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_log_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x4e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x4e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_log_f32_e32 v5, -1 ; encoding: [0xc1,0x4e,0x0a,0x7e] +0xc1,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, -4.0 ; encoding: [0xf7,0x4e,0x0a,0x7e] +0xf7,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, exec_hi ; encoding: [0x7f,0x4e,0x0a,0x7e] +0x7f,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, exec_lo ; encoding: [0x7e,0x4e,0x0a,0x7e] +0x7e,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, m0 ; encoding: [0x7d,0x4e,0x0a,0x7e] +0x7d,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, s103 ; encoding: [0x67,0x4e,0x0a,0x7e] +0x67,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, s1 ; encoding: [0x01,0x4e,0x0a,0x7e] +0x01,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, v1 ; encoding: [0x01,0x4f,0x0a,0x7e] +0x01,0x4f,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, v255 ; encoding: [0xff,0x4f,0x0a,0x7e] +0xff,0x4f,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x4e,0x0a,0x7e] +0x6b,0x4e,0x0a,0x7e + +# GFX11: v_log_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x4e,0x0a,0x7e] +0x6a,0x4e,0x0a,0x7e + +# GFX11: v_lshl_add_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x46,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x46,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x46,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x46,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x46,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x46,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x46,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_lshl_add_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x46,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_lshl_add_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x46,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_lshl_add_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x46,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_lshl_add_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x46,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_lshl_add_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x46,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_lshl_add_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x46,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_lshl_add_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_lshl_add_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x46,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_lshl_add_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x46,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x56,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x56,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x56,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x56,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x56,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x56,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x56,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_lshl_or_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x56,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_lshl_or_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x56,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_lshl_or_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x56,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_lshl_or_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x56,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_lshl_or_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x56,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_lshl_or_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x56,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_lshl_or_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_lshl_or_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x56,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x56,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_lshl_or_b32_e64_dpp v255, v5, v0, vcc_hi row_xmask:6 row_mask:0x0 bank_mask:0xf fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f] +0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f + +# GFX11: v_lshlrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x38,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x38,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x38,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x38,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x38,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x38,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x38,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x38,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x38,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x38,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_lshlrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x38,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x38,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x38,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[254:255], v1, v[2:3] ; encoding: [0xfe,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], exec_hi, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], m0, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], s101, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], s1, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, 0.5 ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, 0 ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, -1 ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, -4.0 ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, exec ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, s[100:101] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xc9,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xc9,0x00,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, s[4:5] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, s[6:7] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, v[254:255] ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], v255, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x3c,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], vcc_hi, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_lshlrev_b64 v[5:6], vcc_lo, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x39,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x39,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x39,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x39,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x39,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x39,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x39,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x39,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x39,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x39,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_lshrrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x39,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x39,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x39,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[254:255], v1, v[2:3] ; encoding: [0xfe,0x00,0x3d,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x3d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], exec_hi, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], m0, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], s101, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], s1, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, 0.5 ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, 0 ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, -1 ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, -4.0 ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, exec ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, s[100:101] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xc9,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xc9,0x00,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, s[4:5] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, s[6:7] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, v[254:255] ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], v255, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x3d,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], vcc_hi, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_lshrrev_b64 v[5:6], vcc_lo, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x53,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x53,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00] +0x05,0x00,0x53,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00 + +# GFX11: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x53,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x53,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x5a,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_mad_i32_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x5a,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_mad_i32_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_mad_i32_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mad_i32_i16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mad_i32_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_mad_i32_i16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_mad_i32_i16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_mad_i32_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x5a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x5a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x5a,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mad_i32_i16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x5a,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mad_i32_i16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0a,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0a,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_mad_i32_i24 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mad_i32_i24 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mad_i32_i24 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_mad_i32_i24 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_mad_i32_i24 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_mad_i32_i24 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0a,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mad_i32_i24 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mad_i32_i24 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_mad_i32_i24 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0a,0xd6,0x6a,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[254:255], s12, v1, v2, v[3:4] ; encoding: [0xfe,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_i64_i32 v[254:255], s[12:13], v1, v2, v[3:4] ; encoding: [0xfe,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s100, v1, v2, v[3:4] ; encoding: [0x05,0x64,0xff,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[100:101], v1, v2, v[3:4] ; encoding: [0x05,0x64,0xff,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x64,0xff,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, 0.5, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xf0,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], 0.5, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0xf0,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, 0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x80,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], 0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x80,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, -1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xc1,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], -1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0xc1,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, -4.0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xf7,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], -4.0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0xf7,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, exec_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x7f,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, exec_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x7e,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, m0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], m0, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x7d,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, s101, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x65,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], s101, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x65,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, s1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], s1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x01,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, 0.5, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xe1,0x0d,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, 0.5, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xe1,0x0d,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, 0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x01,0x0d,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, 0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x0c,0xff,0xd6,0x01,0x01,0x0d,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, -1, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x83,0x0d,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, -1, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x0c,0xff,0xd6,0x01,0x83,0x0d,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, -4.0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xef,0x0d,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, -4.0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xef,0x0d,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, exec_hi, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xff,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, exec_hi, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xff,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, exec_lo, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xfd,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, exec_lo, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xfd,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, m0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xfb,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, m0, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xfb,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, s101, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xcb,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, s101, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xcb,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, s2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, s2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, 0.5 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xc2,0x03] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, 0.5 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x0c,0xff,0xd6,0x01,0x05,0xc2,0x03 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, 0 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x02,0x02] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, 0 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x02,0x02] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x02,0x02 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, -1 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x06,0x03] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, -1 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x06,0x03] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x06,0x03 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, -4.0 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xde,0x03] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, -4.0 ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xde,0x03] +0x05,0x0c,0xff,0xd6,0x01,0x05,0xde,0x03 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v255, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xff,0x0f,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v255, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xff,0x0f,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x01 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, s[100:101] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x92,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, s[100:101] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x92,0x01] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x92,0x01 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x1a,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x1a,0x00 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, s[8:9] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x22,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, s[8:9] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x22,0x00] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x22,0x00 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x07] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x0c,0xff,0xd6,0x01,0x05,0xfa,0x07 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, v2, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xaa,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, v2, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x0c,0xff,0xd6,0x01,0x05,0xaa,0x01 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, vcc_hi, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xd7,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, vcc_hi, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xd7,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v1, vcc_lo, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xd5,0x0c,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, vcc_lo, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x0c,0xff,0xd6,0x01,0xd5,0x0c,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, v255, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0x05,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v255, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0xff,0x05,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, vcc_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x6b,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s12, vcc_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0x04,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x0c,0xff,0xd6,0x6a,0x04,0x0e,0x04 + +# W32: v_mad_i64_i32 v[5:6], s14, v1, v2, v[3:4] ; encoding: [0x05,0x0e,0xff,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_i64_i32 v[5:6], s[14:15], v1, v2, v[3:4] ; encoding: [0x05,0x0e,0xff,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x0e,0xff,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x41,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x41,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00] +0x05,0x00,0x41,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00 + +# GFX11: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x41,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x41,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x59,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_mad_u32_u16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x59,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_mad_u32_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x59,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x59,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x59,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_mad_u32_u16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x59,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mad_u32_u16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x59,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mad_u32_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x59,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_mad_u32_u16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x59,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_mad_u32_u16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x59,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_mad_u32_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x59,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x59,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x59,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mad_u32_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x59,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mad_u32_u16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x59,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x0b,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x0b,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_mad_u32_u24 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mad_u32_u24 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mad_u32_u24 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_mad_u32_u24 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_mad_u32_u24 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_mad_u32_u24 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x0b,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mad_u32_u24 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mad_u32_u24 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_mad_u32_u24 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x0b,0xd6,0x6a,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[254:255], s12, v1, v2, v[3:4] ; encoding: [0xfe,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_u64_u32 v[254:255], s[12:13], v1, v2, v[3:4] ; encoding: [0xfe,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s100, v1, v2, v[3:4] ; encoding: [0x05,0x64,0xfe,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[100:101], v1, v2, v[3:4] ; encoding: [0x05,0x64,0xfe,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x64,0xfe,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, 0.5, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xf0,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], 0.5, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0xf0,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, 0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x80,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], 0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x80,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, -1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xc1,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], -1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0xc1,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, -4.0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xf7,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], -4.0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0xf7,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, exec_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x7f,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, exec_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x7e,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, m0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], m0, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x7d,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, s101, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x65,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], s101, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x65,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, s1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], s1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, 0.5, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xe1,0x0d,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, 0.5, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xe1,0x0d,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, 0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x01,0x0d,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, 0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0x01,0x0d,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, -1, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x83,0x0d,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, -1, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0x83,0x0d,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, -4.0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xef,0x0d,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, -4.0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xef,0x0d,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, exec_hi, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, exec_hi, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, exec_lo, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xfd,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, exec_lo, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xfd,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, m0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xfb,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, m0, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xfb,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, s101, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xcb,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, s101, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xcb,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, s2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, s2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, 0.5 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xc2,0x03] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, 0.5 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0xc2,0x03 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, 0 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x02,0x02] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, 0 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x02,0x02] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x02,0x02 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, -1 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x06,0x03] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, -1 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x06,0x03] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x06,0x03 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, -4.0 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xde,0x03] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, -4.0 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xde,0x03] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0xde,0x03 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v255, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0f,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v255, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xff,0x0f,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x01 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, s[100:101] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x92,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, s[100:101] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x92,0x01] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x92,0x01 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x1a,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x1a,0x00 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, s[8:9] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x22,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, s[8:9] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x22,0x00] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x22,0x00 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x07] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0xfa,0x07 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, v2, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xaa,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, v2, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0xaa,0x01 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, vcc_hi, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xd7,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, vcc_hi, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xd7,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v1, vcc_lo, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xd5,0x0c,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, vcc_lo, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x0c,0xfe,0xd6,0x01,0xd5,0x0c,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, v255, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0x05,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v255, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0xff,0x05,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, vcc_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_hi, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x6b,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s12, vcc_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0x04,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_lo, v2, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x0c,0xfe,0xd6,0x6a,0x04,0x0e,0x04 + +# W32: v_mad_u64_u32 v[5:6], s14, v1, v2, v[3:4] ; encoding: [0x05,0x0e,0xfe,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_mad_u64_u32 v[5:6], s[14:15], v1, v2, v[3:4] ; encoding: [0x05,0x0e,0xfe,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x0e,0xfe,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_max3_f16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4c,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_f16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4c,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_f16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_max3_f16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_max3_f16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_f16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_max3_f16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_f16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_f16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_f16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_f16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_f16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_max3_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_max3_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_max3_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_max3_f16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_f16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4c,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_f16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_f16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_f16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_f16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4c,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_max3_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1c,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1c,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_max3_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_max3_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_max3_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x1c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x1c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_max3_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_max3_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_max3_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_max3_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_max3_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_max3_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_max3_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1c,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1c,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max3_i16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x4d,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_max3_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x4d,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_max3_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4d,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4d,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_i16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_i16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x03,0x00,0x38,0x00,0x00] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x03,0x00,0x38,0x00,0x00 + +# GFX11: v_max3_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00 + +# GFX11: v_max3_i16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_i16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_i16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_i16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_i16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_i16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_i16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4d,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_i16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_i16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4d,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_i16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_i16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_i16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_i16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4d,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_i32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_max3_i32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1d,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_i32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1d,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_i32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_max3_i32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_max3_i32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_i32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_i32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_max3_i32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_i32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_i32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_i32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_i32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_i32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_i32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_i32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_i32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1d,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_i32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_i32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_i32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_i32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1d,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4e,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4e,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_u16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x03,0x00,0x38,0x00,0x00] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x03,0x00,0x38,0x00,0x00 + +# GFX11: v_max3_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x03,0x00,0xc4,0x00,0x00 + +# GFX11: v_max3_u16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_u16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_u16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_u16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_u16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4e,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4e,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_u16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4e,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_max3_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1e,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_max3_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1e,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_max3_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_max3_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_max3_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_max3_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_max3_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_max3_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_max3_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_max3_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_max3_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_max3_u32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_max3_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_max3_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_max3_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_max3_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1e,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_max3_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_max3_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_max3_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_max3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1e,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_max_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], exec, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], s[102:103], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], s[2:3], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], s[4:5], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], exec ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], s[102:103] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xcd,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xcd,0x00,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], s[4:5] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], s[6:7] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] clamp ; encoding: [0x05,0x80,0x2a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x2a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] div:2 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], -v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_max_f64 v[5:6], v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_max_f64 v[5:6], -v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] mul:2 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] mul:4 ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_max_f64 v[5:6], v[1:2], v[254:255] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_max_f64 v[5:6], v[1:2], vcc ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_max_f64 v[5:6], v[254:255], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x2a,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_max_f64 v[5:6], vcc, v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 div:2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_max_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_max_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_max_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_max_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_max_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x0a,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_max_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_max_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_max_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x09,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_max_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x09,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_max_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x09,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_max_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_max_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_max_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x09,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_max_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_max_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x20,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x20,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x20,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x20,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x20,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x20,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x20,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x20,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x20,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x20,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x20,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x20,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x20,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x1f,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_med3_f16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_med3_f16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4f,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_f16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4f,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_f16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_med3_f16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_med3_f16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_f16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_med3_f16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_f16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_f16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_f16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_f16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_f16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_med3_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_med3_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_med3_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_med3_f16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_f16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4f,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_f16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_f16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_f16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_f16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4f,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_med3_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1f,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1f,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_med3_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_med3_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_med3_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x1f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x1f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_med3_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_med3_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_med3_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_med3_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_med3_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_med3_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_med3_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1f,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1f,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x50,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x50,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x50,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_med3_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x50,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_med3_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x50,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x50,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_i16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x50,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_i16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x50,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_i16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x50,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_i16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_i16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_i16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_i16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_i16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x50,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x50,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x50,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_i16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_i16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x50,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_i16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_i16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x50,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_i16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_i16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x50,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x20,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x20,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_i32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x20,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_med3_i32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x20,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_i32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x20,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_i32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x20,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_med3_i32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x20,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_med3_i32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x20,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_i32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x20,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_i32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x20,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_med3_i32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x20,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_i32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_i32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_i32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_i32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_i32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x20,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_i32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_i32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_i32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x20,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_i32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_i32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x20,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_i32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_i32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x20,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x51,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x51,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x51,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_u16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xff,0x0d,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x51,0xd6,0x01,0xff,0x0d,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_med3_u16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x51,0xd6,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_med3_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x51,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x51,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_u16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x51,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_u16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x51,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_u16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_u16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_u16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x51,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x51,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_u16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x51,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x51,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x21,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x21,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x21,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_med3_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x21,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_med3_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x21,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_med3_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x21,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_med3_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x21,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_med3_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x21,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_med3_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x21,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_med3_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x21,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_med3_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x21,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_med3_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_med3_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_med3_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_med3_u32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_med3_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x21,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_med3_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_med3_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_med3_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x21,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_med3_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_med3_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x21,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_med3_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_med3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x21,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x49,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x49,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x49,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_min3_f16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x49,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_f16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x49,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_f16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x49,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_min3_f16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x49,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_min3_f16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x49,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_f16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x49,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x49,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_min3_f16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x49,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_f16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_f16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_f16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_f16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_f16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x49,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x49,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x49,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_min3_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_min3_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_min3_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_min3_f16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_f16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x49,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_f16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_f16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x49,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_f16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_f16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x49,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x19,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x19,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x19,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_min3_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x19,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x19,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x19,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_min3_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x19,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_min3_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x19,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x19,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x19,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_min3_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x19,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x19,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x19,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x19,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_min3_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_min3_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_min3_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_min3_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_min3_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_min3_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_min3_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x19,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x19,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x19,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x4a,0xd6,0xff,0x04,0x0e,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_min3_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x4a,0xd6,0xff,0x04,0x0e,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_min3_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4a,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4a,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_i16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_i16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_i16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_i16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_i16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_i16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_i16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_i16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_i16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_i16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4a,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_i16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_i16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_i16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_i16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4a,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_i32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_min3_i32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1a,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_i32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1a,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_i32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_min3_i32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_min3_i32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_i32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_i32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_min3_i32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_i32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_i32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_i32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_i32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_i32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_i32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_i32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_i32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1a,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_i32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_i32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_i32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_i32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1a,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x4b,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_u16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xff,0x0d,0x04,0x00,0x38,0x00,0x00] +0x05,0x00,0x4b,0xd6,0x01,0xff,0x0d,0x04,0x00,0x38,0x00,0x00 + +# GFX11: v_min3_u16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] +0x05,0x00,0x4b,0xd6,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00 + +# GFX11: v_min3_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x4b,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_u16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_u16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_u16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_u16 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_u16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x4b,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x4b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x4b,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_u16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x4b,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_min3_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x1b,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_min3_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x1b,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_min3_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_min3_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_min3_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_min3_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_min3_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_min3_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_min3_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_min3_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_min3_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_min3_u32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_min3_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_min3_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_min3_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_min3_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x1b,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_min3_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_min3_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_min3_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_min3_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x1b,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa + +# GFX11: v_minmax_f32_e64_dpp v0, |v1|, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], exec, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], s[102:103], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], s[2:3], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], s[4:5], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x29,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x29,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x29,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x29,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], exec ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x29,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], s[102:103] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xcd,0x00,0x00] +0x05,0x00,0x29,0xd7,0x01,0xcd,0x00,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], s[4:5] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x29,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], s[6:7] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x29,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] clamp ; encoding: [0x05,0x80,0x29,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x29,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] div:2 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], -v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_min_f64 v[5:6], v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_min_f64 v[5:6], -v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] mul:2 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] mul:4 ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_min_f64 v[5:6], v[1:2], v[254:255] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x29,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_min_f64 v[5:6], v[1:2], vcc ; encoding: [0x05,0x00,0x29,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x29,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_min_f64 v[5:6], v[254:255], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x29,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_min_f64 v[5:6], vcc, v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x29,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_min_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_min_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_min_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_min_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_min_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x0c,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_min_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_min_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_min_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_min_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_min_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_min_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_min_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_min_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_min_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x0b,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_min_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_min_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_movreld_b32_e32 v255, v1 ; encoding: [0x01,0x85,0xfe,0x7f] +0x01,0x85,0xfe,0x7f + +# GFX11: v_movreld_b32_e32 v5, 0.5 ; encoding: [0xf0,0x84,0x0a,0x7e] +0xf0,0x84,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, 0 ; encoding: [0x80,0x84,0x0a,0x7e] +0x80,0x84,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, -1 ; encoding: [0xc1,0x84,0x0a,0x7e] +0xc1,0x84,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, -4.0 ; encoding: [0xf7,0x84,0x0a,0x7e] +0xf7,0x84,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, m0 ; encoding: [0x7d,0x84,0x0a,0x7e] +0x7d,0x84,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, v1 ; encoding: [0x01,0x85,0x0a,0x7e] +0x01,0x85,0x0a,0x7e + +# GFX11: v_movreld_b32_e32 v5, v255 ; encoding: [0xff,0x85,0x0a,0x7e] +0xff,0x85,0x0a,0x7e + +# GFX11: v_movrels_b32_e32 v255, v1 ; encoding: [0x01,0x87,0xfe,0x7f] +0x01,0x87,0xfe,0x7f + +# GFX11: v_movrels_b32_e32 v5, v1 ; encoding: [0x01,0x87,0x0a,0x7e] +0x01,0x87,0x0a,0x7e + +# GFX11: v_movrels_b32_e32 v5, v255 ; encoding: [0xff,0x87,0x0a,0x7e] +0xff,0x87,0x0a,0x7e + +# GFX11: v_movrelsd_2_b32_e32 v255, v1 ; encoding: [0x01,0x91,0xfe,0x7f] +0x01,0x91,0xfe,0x7f + +# GFX11: v_movrelsd_2_b32_e32 v5, v1 ; encoding: [0x01,0x91,0x0a,0x7e] +0x01,0x91,0x0a,0x7e + +# GFX11: v_movrelsd_2_b32_e32 v5, v255 ; encoding: [0xff,0x91,0x0a,0x7e] +0xff,0x91,0x0a,0x7e + +# GFX11: v_movrelsd_b32_e32 v255, v1 ; encoding: [0x01,0x89,0xfe,0x7f] +0x01,0x89,0xfe,0x7f + +# GFX11: v_movrelsd_b32_e32 v5, v1 ; encoding: [0x01,0x89,0x0a,0x7e] +0x01,0x89,0x0a,0x7e + +# GFX11: v_movrelsd_b32_e32 v5, v255 ; encoding: [0xff,0x89,0x0a,0x7e] +0xff,0x89,0x0a,0x7e + +# GFX11: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x3b,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x00,0x3b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], 0, v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], -1, v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], exec, v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[102:103], v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x66,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x66,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[2:3], v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x02,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x02,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[4:5], v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x04,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x04,0x04,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], 0, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x3b,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], -1, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x3b,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], exec_hi, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], exec_lo, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], m0, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s103, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, 0 ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, -1 ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, exec ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x3b,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, s[102:103] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x9a,0x01] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x9a,0x01 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, s[6:7] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x1a,0x00 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, s[8:9] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x22,0x00] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x22,0x00 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[254:255] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x00,0x3b,0xd6,0x01,0x05,0xfa,0x07 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, vcc ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x3b,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], vcc_lo, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[254:255], v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0xfe,0x05,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0xfe,0x05,0x0e,0x04 + +# GFX11: v_mqsad_pk_u16_u8 v[5:6], vcc, v2, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x3b,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], 0, v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x80,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], -1, v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0xc1,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], exec, v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x7e,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], s[102:103], v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x66,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x66,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], s[2:3], v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x02,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x02,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], s[4:5], v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x04,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x04,0x04,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], 0, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0x01,0x0d,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], -1, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0x83,0x0d,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], exec_hi, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xff,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], exec_lo, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xfd,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], m0, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xfb,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], s103, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xcf,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], s2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0x05,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0x05,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], vcc_hi, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xd7,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], v[1:2], vcc_lo, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x01,0xd5,0x0c,0x04] +0xfc,0x00,0x3d,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mqsad_u32_u8 v[252:255], vcc, v2, v[3:6] ; encoding: [0xfc,0x00,0x3d,0xd6,0x6a,0x04,0x0e,0x04] +0xfc,0x00,0x3d,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x39,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x39,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_msad_u8 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x39,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_msad_u8 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x39,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_msad_u8 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x39,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_msad_u8 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x39,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_msad_u8 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x39,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_msad_u8 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x39,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_msad_u8 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x39,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_msad_u8 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x39,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_msad_u8 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x39,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_msad_u8 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_msad_u8 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_msad_u8 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_msad_u8 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_msad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_msad_u8 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x39,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_msad_u8 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_msad_u8 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_msad_u8 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x39,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_msad_u8 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_msad_u8 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x39,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_msad_u8 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_msad_u8 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x39,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_mul_f64 v[254:255], v[1:2], v[2:3] ; encoding: [0xfe,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], 0.5, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], 0, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], -1, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], -4.0, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], exec, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], s[102:103], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], s[2:3], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], s[4:5], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x28,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x28,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x28,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x28,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], exec ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x28,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], s[102:103] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xcd,0x00,0x00] +0x05,0x00,0x28,0xd7,0x01,0xcd,0x00,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], s[4:5] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x09,0x00,0x00] +0x05,0x00,0x28,0xd7,0x01,0x09,0x00,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], s[6:7] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x0d,0x00,0x00] +0x05,0x00,0x28,0xd7,0x01,0x0d,0x00,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] clamp ; encoding: [0x05,0x80,0x28,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x28,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] div:2 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], -v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_mul_f64 v[5:6], v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_mul_f64 v[5:6], -v[1:2], -v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] mul:2 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] mul:4 ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_mul_f64 v[5:6], v[1:2], v[254:255] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xfd,0x03,0x00] +0x05,0x00,0x28,0xd7,0x01,0xfd,0x03,0x00 + +# GFX11: v_mul_f64 v[5:6], v[1:2], vcc ; encoding: [0x05,0x00,0x28,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x28,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mul_f64 v[5:6], v[254:255], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x28,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_mul_f64 v[5:6], vcc, v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x28,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v255, v1, v2 ; encoding: [0xff,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, 0, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, -1, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, m0, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, s103, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, s1, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, 0 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, -1 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, m0 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, s103 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, s2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, v255 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mul_hi_i32 v5, v255, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x2e,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mul_hi_i32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, 0, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, -1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, m0, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, s103, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, s1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, 0 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, -1 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, m0 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, s103 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, s2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, v255 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mul_hi_u32 v5, v255, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x2d,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mul_hi_u32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mullit_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x18,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x18,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x18,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_mullit_f32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x18,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_mullit_f32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x18,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_mullit_f32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x18,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_mullit_f32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x18,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_mullit_f32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x18,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_mullit_f32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x18,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_mullit_f32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x18,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_mullit_f32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x18,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_mullit_f32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_mullit_f32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_mullit_f32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_mullit_f32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_mullit_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_mullit_f32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x18,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_mullit_f32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x18,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x80,0x18,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, v1, v2, v3 div:2 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x1c] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x1c + +# GFX11: v_mullit_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x24] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x24 + +# GFX11: v_mullit_f32 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x44] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x44 + +# GFX11: v_mullit_f32 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x84] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x84 + +# GFX11: v_mullit_f32 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0xe4 + +# GFX11: v_mullit_f32 v5, v1, v2, v3 mul:2 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x0c + +# GFX11: v_mullit_f32 v5, v1, v2, v3 mul:4 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x14] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x14 + +# GFX11: v_mullit_f32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_mullit_f32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x18,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_mullit_f32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_mullit_f32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x18,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_mullit_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x18,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_mul_lo_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x05,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x05,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x05,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x05,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, 0, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, -1, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, m0, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, s103, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, s1, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, 0 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, -1 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, m0 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, s103 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, s2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, v255 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_mul_lo_u32 v5, v255, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x2c,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_mul_lo_u32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_not_b32_e32 v255, v1 ; encoding: [0x01,0x6f,0xfe,0x7f] +0x01,0x6f,0xfe,0x7f + +# GFX11: v_not_b32_e32 v5, 0.5 ; encoding: [0xf0,0x6e,0x0a,0x7e] +0xf0,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, 0 ; encoding: [0x80,0x6e,0x0a,0x7e] +0x80,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x6e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x6e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_not_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x6e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x6e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_not_b32_e32 v5, -1 ; encoding: [0xc1,0x6e,0x0a,0x7e] +0xc1,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, -4.0 ; encoding: [0xf7,0x6e,0x0a,0x7e] +0xf7,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, exec_hi ; encoding: [0x7f,0x6e,0x0a,0x7e] +0x7f,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, exec_lo ; encoding: [0x7e,0x6e,0x0a,0x7e] +0x7e,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, m0 ; encoding: [0x7d,0x6e,0x0a,0x7e] +0x7d,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, s103 ; encoding: [0x67,0x6e,0x0a,0x7e] +0x67,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, s1 ; encoding: [0x01,0x6e,0x0a,0x7e] +0x01,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, v1 ; encoding: [0x01,0x6f,0x0a,0x7e] +0x01,0x6f,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, v255 ; encoding: [0xff,0x6f,0x0a,0x7e] +0xff,0x6f,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x6e,0x0a,0x7e] +0x6b,0x6e,0x0a,0x7e + +# GFX11: v_not_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x6e,0x0a,0x7e] +0x6a,0x6e,0x0a,0x7e + +# GFX11: v_or3_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x58,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x58,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_or3_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x58,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_or3_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x58,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_or3_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x58,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_or3_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x58,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_or3_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x58,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_or3_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x58,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_or3_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x58,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_or3_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x58,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_or3_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x58,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_or3_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_or3_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_or3_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_or3_b32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_or3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_or3_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x58,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_or3_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_or3_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_or3_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x58,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_or3_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_or3_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x58,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_or3_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_or3_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x58,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_pack_b32_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x11,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x11,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x11,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x11,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x11,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x11,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_pack_b32_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x40] +0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x40 + +# GFX11: v_pack_b32_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x60] +0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x60 + +# GFX11: v_pack_b32_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x11,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_pack_b32_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x11,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_pack_b32_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x11,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_perm_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x44,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x44,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_perm_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x44,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_perm_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x44,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_perm_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x44,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_perm_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x44,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_perm_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x44,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_perm_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x44,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_perm_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x44,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_perm_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x44,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_perm_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x44,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_perm_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_perm_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_perm_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_perm_b32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_perm_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x44,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_perm_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_perm_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_perm_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x44,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_perm_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_perm_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x44,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_perm_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_perm_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x44,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_permlane16_b32 v255, v1, s2, s3 ; encoding: [0xff,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] +0xff,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v103, s2, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x67,0x05,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x67,0x05,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, 0.5, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xe1,0x0d,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xe1,0x0d,0x00 + +# GFX11: v_permlane16_b32 v5, v1, 0, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x01,0x0d,0x00] +0x05,0x00,0x5b,0xd6,0x01,0x01,0x0d,0x00 + +# GFX11: v_permlane16_b32 v5, v1, -1, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x0d,0x00] +0x05,0x00,0x5b,0xd6,0x01,0x83,0x0d,0x00 + +# GFX11: v_permlane16_b32 v5, v1, -4.0, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xef,0x0d,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xef,0x0d,0x00 + +# GFX11: v_permlane16_b32 v5, v1, exec_hi, s2 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x08,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xff,0x08,0x00 + +# GFX11: v_permlane16_b32 v5, v1, exec_hi, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xff,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, exec_lo, s2 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0x08,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xfd,0x08,0x00 + +# GFX11: v_permlane16_b32 v5, v1, exec_lo, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xfd,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, m0, s2 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x08,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xfb,0x08,0x00 + +# GFX11: v_permlane16_b32 v5, v1, m0, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xfb,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xcf,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xcf,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, s2, 0.5 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xc0,0x03] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xc0,0x03 + +# GFX11: v_permlane16_b32 v5, v1, s2, 0 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x00,0x02] +0x05,0x00,0x5b,0xd6,0x01,0x05,0x00,0x02 + +# GFX11: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x04,0x03] +0x05,0x00,0x5b,0xd6,0x01,0x05,0x04,0x03 + +# GFX11: v_permlane16_b32 v5, v1, s2, -4.0 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xdc,0x03] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xdc,0x03 + +# GFX11: v_permlane16_b32 v5, v1, s2, exec_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x01 + +# GFX11: v_permlane16_b32 v5, v1, s2, exec_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xf8,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xf8,0x01 + +# GFX11: v_permlane16_b32 v5, v1, s2, m0 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xf4,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xf4,0x01 + +# GFX11: v_permlane16_b32 v5, v1, s2, s103 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x9c,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0x9c,0x01 + +# GFX11: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, s2, vcc_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xac,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xac,0x01 + +# GFX11: v_permlane16_b32 v5, v1, s2, vcc_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xa8,0x01] +0x05,0x00,0x5b,0xd6,0x01,0x05,0xa8,0x01 + +# GFX11: v_permlane16_b32 v5, v1, vcc_hi, s2 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0x08,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xd7,0x08,0x00 + +# GFX11: v_permlane16_b32 v5, v1, vcc_hi, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xd7,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v1, vcc_lo, s2 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0x08,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xd5,0x08,0x00 + +# GFX11: v_permlane16_b32 v5, v1, vcc_lo, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0xd5,0x0c,0x00 + +# GFX11: v_permlane16_b32 v5, v255, s2, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0xff,0x05,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0xff,0x05,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v255, v1, s2, s3 ; encoding: [0xff,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] +0xff,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v103, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x67,0x05,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x67,0x05,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, 0.5, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xe1,0x0d,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xe1,0x0d,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, 0, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x01,0x0d,0x00] +0x05,0x00,0x5c,0xd6,0x01,0x01,0x0d,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, -1, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x0d,0x00] +0x05,0x00,0x5c,0xd6,0x01,0x83,0x0d,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, -4.0, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xef,0x0d,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xef,0x0d,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, exec_hi, s2 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x08,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xff,0x08,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, exec_hi, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xff,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, exec_lo, s2 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0x08,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xfd,0x08,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, exec_lo, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xfd,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, m0, s2 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x08,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0x08,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, m0, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xcf,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xcf,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, s2, 0.5 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xc0,0x03] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xc0,0x03 + +# GFX11: v_permlanex16_b32 v5, v1, s2, 0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x00,0x02] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x00,0x02 + +# GFX11: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x04,0x03] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x04,0x03 + +# GFX11: v_permlanex16_b32 v5, v1, s2, -4.0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xdc,0x03] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xdc,0x03 + +# GFX11: v_permlanex16_b32 v5, v1, s2, exec_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, s2, exec_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xf8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xf8,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, s2, m0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xf4,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xf4,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, s2, s103 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x9c,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x9c,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, s2, vcc_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xac,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xac,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, s2, vcc_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xa8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0x05,0xa8,0x01 + +# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, s2 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0x08,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xd7,0x08,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xd7,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, s2 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0x08,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xd5,0x08,0x00 + +# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0xd5,0x0c,0x00 + +# GFX11: v_permlanex16_b32 v5, v255, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0xff,0x05,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0xff,0x05,0x0c,0x00 + +# GFX11: v_pipeflush ; encoding: [0x00,0x36,0x00,0x7e] +0x00,0x36,0x00,0x7e + +# GFX11: v_pk_fmac_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] +0x01,0x05,0xfe,0x79 + +# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +0xf0,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] +0x80,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +0xc1,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] +0xf7,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] +0x7f,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] +0x7e,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] +0x7d,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78] +0x67,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] +0x01,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] +0x01,0xff,0x0b,0x78 + +# GFX11: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] +0x01,0x05,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] +0xff,0x05,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] +0x6b,0x04,0x0a,0x78 + +# GFX11: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] +0x6a,0x04,0x0a,0x78 + +# GFX11: v_qsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x3a,0xd6,0x01,0x05,0x0e,0x04] +0xfe,0x00,0x3a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], 0, v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], -1, v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], exec, v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[102:103], v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x66,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x66,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[2:3], v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x02,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x02,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[4:5], v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x04,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x04,0x04,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], 0, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x3a,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], -1, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x3a,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], exec_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], exec_lo, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], m0, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s103, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, 0 ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, -1 ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, exec ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, s[102:103] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x9a,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x9a,0x01 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, s[6:7] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x1a,0x00] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x1a,0x00 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, s[8:9] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x22,0x00] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x22,0x00 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[254:255] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xfa,0x07] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xfa,0x07 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, vcc ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], vcc_lo, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[254:255], v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0xfe,0x05,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0xfe,0x05,0x0e,0x04 + +# GFX11: v_qsad_pk_u16_u8 v[5:6], vcc, v2, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x3a,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_rcp_f32_e32 v255, v1 ; encoding: [0x01,0x55,0xfe,0x7f] +0x01,0x55,0xfe,0x7f + +# GFX11: v_rcp_f32_e32 v5, 0.5 ; encoding: [0xf0,0x54,0x0a,0x7e] +0xf0,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, 0 ; encoding: [0x80,0x54,0x0a,0x7e] +0x80,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x54,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x54,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rcp_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x54,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x54,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_f32_e32 v5, -1 ; encoding: [0xc1,0x54,0x0a,0x7e] +0xc1,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, -4.0 ; encoding: [0xf7,0x54,0x0a,0x7e] +0xf7,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, exec_hi ; encoding: [0x7f,0x54,0x0a,0x7e] +0x7f,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, exec_lo ; encoding: [0x7e,0x54,0x0a,0x7e] +0x7e,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, m0 ; encoding: [0x7d,0x54,0x0a,0x7e] +0x7d,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, s103 ; encoding: [0x67,0x54,0x0a,0x7e] +0x67,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, s1 ; encoding: [0x01,0x54,0x0a,0x7e] +0x01,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, v1 ; encoding: [0x01,0x55,0x0a,0x7e] +0x01,0x55,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, v255 ; encoding: [0xff,0x55,0x0a,0x7e] +0xff,0x55,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x54,0x0a,0x7e] +0x6b,0x54,0x0a,0x7e + +# GFX11: v_rcp_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x54,0x0a,0x7e] +0x6a,0x54,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x5f,0xfc,0x7f] +0x01,0x5f,0xfc,0x7f + +# GFX11: v_rcp_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x5e,0x0a,0x7e] +0xf0,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], 0 ; encoding: [0x80,0x5e,0x0a,0x7e] +0x80,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x5e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x5e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rcp_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x5e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x5e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x5e,0x0a,0x7e] +0xc1,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x5e,0x0a,0x7e] +0xf7,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], exec ; encoding: [0x7e,0x5e,0x0a,0x7e] +0x7e,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x5e,0x0a,0x7e] +0x66,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x5e,0x0a,0x7e] +0x02,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x5e,0x0a,0x7e] +0x04,0x5e,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x5f,0x0a,0x7e] +0x01,0x5f,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x5f,0x0a,0x7e] +0xfe,0x5f,0x0a,0x7e + +# GFX11: v_rcp_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x5e,0x0a,0x7e] +0x6a,0x5e,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v255, v1 ; encoding: [0x01,0x57,0xfe,0x7f] +0x01,0x57,0xfe,0x7f + +# GFX11: v_rcp_iflag_f32_e32 v5, 0.5 ; encoding: [0xf0,0x56,0x0a,0x7e] +0xf0,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, 0 ; encoding: [0x80,0x56,0x0a,0x7e] +0x80,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x56,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x56,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rcp_iflag_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x56,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x56,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_iflag_f32_e32 v5, -1 ; encoding: [0xc1,0x56,0x0a,0x7e] +0xc1,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, -4.0 ; encoding: [0xf7,0x56,0x0a,0x7e] +0xf7,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, exec_hi ; encoding: [0x7f,0x56,0x0a,0x7e] +0x7f,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, exec_lo ; encoding: [0x7e,0x56,0x0a,0x7e] +0x7e,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, m0 ; encoding: [0x7d,0x56,0x0a,0x7e] +0x7d,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, s103 ; encoding: [0x67,0x56,0x0a,0x7e] +0x67,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, s1 ; encoding: [0x01,0x56,0x0a,0x7e] +0x01,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, v1 ; encoding: [0x01,0x57,0x0a,0x7e] +0x01,0x57,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, v255 ; encoding: [0xff,0x57,0x0a,0x7e] +0xff,0x57,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x56,0x0a,0x7e] +0x6b,0x56,0x0a,0x7e + +# GFX11: v_rcp_iflag_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x56,0x0a,0x7e] +0x6a,0x56,0x0a,0x7e + +# GFX11: v_readfirstlane_b32 s103, v1 ; encoding: [0x01,0x05,0xce,0x7e] +0x01,0x05,0xce,0x7e + +# GFX11: v_readfirstlane_b32 s5, v1 ; encoding: [0x01,0x05,0x0a,0x7e] +0x01,0x05,0x0a,0x7e + +# GFX11: v_readfirstlane_b32 s5, v255 ; encoding: [0xff,0x05,0x0a,0x7e] +0xff,0x05,0x0a,0x7e + +# GFX11: v_readlane_b32 s101, v1, s2 ; encoding: [0x65,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] +0x65,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v1, 0 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x60,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_readlane_b32 s5, v1, m0 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v1, s101 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v1, s2 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v1, vcc_hi ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v1, vcc_lo ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_readlane_b32 s5, v255, s2 ; encoding: [0x05,0x00,0x60,0xd7,0xff,0x05,0x00,0x00] +0x05,0x00,0x60,0xd7,0xff,0x05,0x00,0x00 + +# GFX11: v_rndne_f32_e32 v255, v1 ; encoding: [0x01,0x47,0xfe,0x7f] +0x01,0x47,0xfe,0x7f + +# GFX11: v_rndne_f32_e32 v5, 0.5 ; encoding: [0xf0,0x46,0x0a,0x7e] +0xf0,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, 0 ; encoding: [0x80,0x46,0x0a,0x7e] +0x80,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x46,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x46,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rndne_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x46,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x46,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f32_e32 v5, -1 ; encoding: [0xc1,0x46,0x0a,0x7e] +0xc1,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, -4.0 ; encoding: [0xf7,0x46,0x0a,0x7e] +0xf7,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, exec_hi ; encoding: [0x7f,0x46,0x0a,0x7e] +0x7f,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, exec_lo ; encoding: [0x7e,0x46,0x0a,0x7e] +0x7e,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, m0 ; encoding: [0x7d,0x46,0x0a,0x7e] +0x7d,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, s103 ; encoding: [0x67,0x46,0x0a,0x7e] +0x67,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, s1 ; encoding: [0x01,0x46,0x0a,0x7e] +0x01,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, v1 ; encoding: [0x01,0x47,0x0a,0x7e] +0x01,0x47,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, v255 ; encoding: [0xff,0x47,0x0a,0x7e] +0xff,0x47,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x46,0x0a,0x7e] +0x6b,0x46,0x0a,0x7e + +# GFX11: v_rndne_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x46,0x0a,0x7e] +0x6a,0x46,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x33,0xfc,0x7f] +0x01,0x33,0xfc,0x7f + +# GFX11: v_rndne_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x32,0x0a,0x7e] +0xf0,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], 0 ; encoding: [0x80,0x32,0x0a,0x7e] +0x80,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x32,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x32,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rndne_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x32,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x32,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x32,0x0a,0x7e] +0xc1,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x32,0x0a,0x7e] +0xf7,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], exec ; encoding: [0x7e,0x32,0x0a,0x7e] +0x7e,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x32,0x0a,0x7e] +0x66,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x32,0x0a,0x7e] +0x02,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x32,0x0a,0x7e] +0x04,0x32,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x33,0x0a,0x7e] +0x01,0x33,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x33,0x0a,0x7e] +0xfe,0x33,0x0a,0x7e + +# GFX11: v_rndne_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x32,0x0a,0x7e] +0x6a,0x32,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v255, v1 ; encoding: [0x01,0x5d,0xfe,0x7f] +0x01,0x5d,0xfe,0x7f + +# GFX11: v_rsq_f32_e32 v5, 0.5 ; encoding: [0xf0,0x5c,0x0a,0x7e] +0xf0,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, 0 ; encoding: [0x80,0x5c,0x0a,0x7e] +0x80,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x5c,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x5c,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rsq_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x5c,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x5c,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f32_e32 v5, -1 ; encoding: [0xc1,0x5c,0x0a,0x7e] +0xc1,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, -4.0 ; encoding: [0xf7,0x5c,0x0a,0x7e] +0xf7,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, exec_hi ; encoding: [0x7f,0x5c,0x0a,0x7e] +0x7f,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, exec_lo ; encoding: [0x7e,0x5c,0x0a,0x7e] +0x7e,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, m0 ; encoding: [0x7d,0x5c,0x0a,0x7e] +0x7d,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, s103 ; encoding: [0x67,0x5c,0x0a,0x7e] +0x67,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, s1 ; encoding: [0x01,0x5c,0x0a,0x7e] +0x01,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, v1 ; encoding: [0x01,0x5d,0x0a,0x7e] +0x01,0x5d,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, v255 ; encoding: [0xff,0x5d,0x0a,0x7e] +0xff,0x5d,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x5c,0x0a,0x7e] +0x6b,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x5c,0x0a,0x7e] +0x6a,0x5c,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x63,0xfc,0x7f] +0x01,0x63,0xfc,0x7f + +# GFX11: v_rsq_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x62,0x0a,0x7e] +0xf0,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], 0 ; encoding: [0x80,0x62,0x0a,0x7e] +0x80,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x62,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x62,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_rsq_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x62,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x62,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x62,0x0a,0x7e] +0xc1,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x62,0x0a,0x7e] +0xf7,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], exec ; encoding: [0x7e,0x62,0x0a,0x7e] +0x7e,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x62,0x0a,0x7e] +0x66,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x62,0x0a,0x7e] +0x02,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x62,0x0a,0x7e] +0x04,0x62,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x63,0x0a,0x7e] +0x01,0x63,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x63,0x0a,0x7e] +0xfe,0x63,0x0a,0x7e + +# GFX11: v_rsq_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x62,0x0a,0x7e] +0x6a,0x62,0x0a,0x7e + +# GFX11: v_sad_hi_u8 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x23,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x23,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x23,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x23,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x23,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x23,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x23,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_sad_hi_u8 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x23,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_sad_hi_u8 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x23,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_sad_hi_u8 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x23,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_sad_hi_u8 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x23,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_sad_hi_u8 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x23,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_sad_hi_u8 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x23,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_sad_hi_u8 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x23,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_sad_hi_u8 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x23,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_sad_hi_u8 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x23,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x23,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x24,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x24,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x24,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_sad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x24,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_sad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x24,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_sad_u16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x24,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_sad_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x24,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_sad_u16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x24,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_sad_u16 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x24,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_sad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x24,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_sad_u16 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x24,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_sad_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_sad_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_sad_u16 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_sad_u16 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_sad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_sad_u16 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x24,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_sad_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_sad_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x24,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_sad_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x24,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_sad_u16 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x24,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_sad_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_sad_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x24,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x25,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x25,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x25,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_sad_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x25,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_sad_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x25,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_sad_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x25,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_sad_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x25,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_sad_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x25,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_sad_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x25,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_sad_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x25,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_sad_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x25,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_sad_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_sad_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_sad_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_sad_u32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_sad_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_sad_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x25,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_sad_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_sad_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x25,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_sad_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_sad_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x25,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_sad_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_sad_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x25,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x22,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x22,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u8 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x22,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_sad_u8 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x22,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_sad_u8 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x22,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_sad_u8 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x22,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_sad_u8 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x22,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_sad_u8 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x22,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_sad_u8 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x22,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_sad_u8 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x22,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_sad_u8 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x22,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_sad_u8 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_sad_u8 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_sad_u8 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_sad_u8 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_sad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_sad_u8 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x22,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_sad_u8 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_sad_u8 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_sad_u8 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x22,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_sad_u8 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_sad_u8 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x22,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_sad_u8 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_sad_u8 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x22,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_sin_f32_e32 v255, v1 ; encoding: [0x01,0x6b,0xfe,0x7f] +0x01,0x6b,0xfe,0x7f + +# GFX11: v_sin_f32_e32 v5, 0.5 ; encoding: [0xf0,0x6a,0x0a,0x7e] +0xf0,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, 0 ; encoding: [0x80,0x6a,0x0a,0x7e] +0x80,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x6a,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x6a,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_sin_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x6a,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x6a,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_sin_f32_e32 v5, -1 ; encoding: [0xc1,0x6a,0x0a,0x7e] +0xc1,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, -4.0 ; encoding: [0xf7,0x6a,0x0a,0x7e] +0xf7,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, exec_hi ; encoding: [0x7f,0x6a,0x0a,0x7e] +0x7f,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, exec_lo ; encoding: [0x7e,0x6a,0x0a,0x7e] +0x7e,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, m0 ; encoding: [0x7d,0x6a,0x0a,0x7e] +0x7d,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, s103 ; encoding: [0x67,0x6a,0x0a,0x7e] +0x67,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, s1 ; encoding: [0x01,0x6a,0x0a,0x7e] +0x01,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, v1 ; encoding: [0x01,0x6b,0x0a,0x7e] +0x01,0x6b,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, v255 ; encoding: [0xff,0x6b,0x0a,0x7e] +0xff,0x6b,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x6a,0x0a,0x7e] +0x6b,0x6a,0x0a,0x7e + +# GFX11: v_sin_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x6a,0x0a,0x7e] +0x6a,0x6a,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v255, v1 ; encoding: [0x01,0x67,0xfe,0x7f] +0x01,0x67,0xfe,0x7f + +# GFX11: v_sqrt_f32_e32 v5, 0.5 ; encoding: [0xf0,0x66,0x0a,0x7e] +0xf0,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, 0 ; encoding: [0x80,0x66,0x0a,0x7e] +0x80,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x66,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x66,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_sqrt_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x66,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x66,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f32_e32 v5, -1 ; encoding: [0xc1,0x66,0x0a,0x7e] +0xc1,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, -4.0 ; encoding: [0xf7,0x66,0x0a,0x7e] +0xf7,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, exec_hi ; encoding: [0x7f,0x66,0x0a,0x7e] +0x7f,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, exec_lo ; encoding: [0x7e,0x66,0x0a,0x7e] +0x7e,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, m0 ; encoding: [0x7d,0x66,0x0a,0x7e] +0x7d,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, s103 ; encoding: [0x67,0x66,0x0a,0x7e] +0x67,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, s1 ; encoding: [0x01,0x66,0x0a,0x7e] +0x01,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, v1 ; encoding: [0x01,0x67,0x0a,0x7e] +0x01,0x67,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, v255 ; encoding: [0xff,0x67,0x0a,0x7e] +0xff,0x67,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x66,0x0a,0x7e] +0x6b,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x66,0x0a,0x7e] +0x6a,0x66,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x69,0xfc,0x7f] +0x01,0x69,0xfc,0x7f + +# GFX11: v_sqrt_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x68,0x0a,0x7e] +0xf0,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], 0 ; encoding: [0x80,0x68,0x0a,0x7e] +0x80,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x68,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x68,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_sqrt_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x68,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x68,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x68,0x0a,0x7e] +0xc1,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x68,0x0a,0x7e] +0xf7,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], exec ; encoding: [0x7e,0x68,0x0a,0x7e] +0x7e,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x68,0x0a,0x7e] +0x66,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x68,0x0a,0x7e] +0x02,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x68,0x0a,0x7e] +0x04,0x68,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x69,0x0a,0x7e] +0x01,0x69,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x69,0x0a,0x7e] +0xfe,0x69,0x0a,0x7e + +# GFX11: v_sqrt_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x68,0x0a,0x7e] +0x6a,0x68,0x0a,0x7e + +# GFX11: v_sub_nc_i32_e64_dpp v93, v94, v95 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1] +0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1 + +# W32: v_sub_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x01,0xd7,0x01,0x05,0x02,0x00] +# W64: v_sub_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x01,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x01,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0xf0,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x80,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x80,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0xc1,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0xf7,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x7f,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x7e,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7d,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x7d,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x67,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x67,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x01,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x01,0xd7,0x01,0xe1,0x01,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x01,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x01,0xd7,0x01,0x01,0x01,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x83,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x01,0xd7,0x01,0x83,0x01,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xef,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x01,0xd7,0x01,0xef,0x01,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xff,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xff,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xfd,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xfb,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xfb,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xcf,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x05,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0x05,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xff,0x03,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x01,0xd7,0x01,0xff,0x03,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x05,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x01,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xd7,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x01,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x01,0xd7,0x01,0xd5,0x00,0x00 + +# W32: v_sub_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xff,0x05,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x01,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x01,0xd7,0xff,0x05,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x6b,0x04,0x02,0x00 + +# W32: v_sub_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x01,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x01,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x0e,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x0e,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x0e,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_sub_nc_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x0e,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x0e,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v255, v1, v2 ; encoding: [0xff,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x25,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x25,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, 0, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, -1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x25,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, m0, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, s103, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x67,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, s1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x25,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, 0 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x25,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, -1 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x25,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, -4.0 clamp ; encoding: [0x05,0x80,0x25,0xd7,0x01,0xef,0x01,0x00] +0x05,0x80,0x25,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x25,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, m0 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, s103 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, s2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, v255 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x25,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x25,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x25,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_sub_nc_i32 v5, v255, v2 ; encoding: [0x05,0x00,0x25,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x25,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_sub_nc_i32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x25,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x65,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x65,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x04,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, 0xc400 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x80,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x04,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xcb,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xcb,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x04,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x04,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_sub_nc_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x04,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_sub_nc_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x02,0xd7,0x01,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x02,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x02,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0xf0,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x80,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x80,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0xc1,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0xf7,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x7f,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x7e,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7d,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x7d,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x67,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x67,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x67,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x01,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x02,0xd7,0x01,0xe1,0x01,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x01,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x02,0xd7,0x01,0x01,0x01,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x83,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x02,0xd7,0x01,0x83,0x01,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xef,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x02,0xd7,0x01,0xef,0x01,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xff,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xff,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xfd,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xfb,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xfb,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xcf,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x05,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0x05,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xff,0x03,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x02,0xd7,0x01,0xff,0x03,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x02,0xd7,0x01,0x05,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xd7,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x02,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x02,0xd7,0x01,0xd5,0x00,0x00 + +# W32: v_subrev_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xff,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x02,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x02,0xd7,0xff,0x05,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x6b,0x04,0x02,0x00 + +# W32: v_subrev_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x02,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x02,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_swap_b32 v255, v1 ; encoding: [0x01,0xcb,0xfe,0x7f] +0x01,0xcb,0xfe,0x7f + +# GFX11: v_swap_b32 v5, v1 ; encoding: [0x01,0xcb,0x0a,0x7e] +0x01,0xcb,0x0a,0x7e + +# GFX11: v_swap_b32 v5, v255 ; encoding: [0xff,0xcb,0x0a,0x7e] +0xff,0xcb,0x0a,0x7e + +# GFX11: v_swaprel_b32 v255, v1 ; encoding: [0x01,0xd1,0xfe,0x7f] +0x01,0xd1,0xfe,0x7f + +# GFX11: v_swaprel_b32 v5, v1 ; encoding: [0x01,0xd1,0x0a,0x7e] +0x01,0xd1,0x0a,0x7e + +# GFX11: v_swaprel_b32 v5, v255 ; encoding: [0xff,0xd1,0x0a,0x7e] +0xff,0xd1,0x0a,0x7e + +# GFX11: v_trig_preop_f64 v[254:255], v[1:2], v2 ; encoding: [0xfe,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] +0xfe,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], 0.5, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0xf0,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], 0, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], -1, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], -4.0, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0xf7,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], exec, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], s[102:103], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x66,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x66,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], s[2:3], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x02,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x02,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], s[4:5], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x04,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x04,0x04,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], 0.5 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xe1,0x01,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], 0 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x2f,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], -1 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x2f,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], -4.0 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xef,0x01,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], exec_hi ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], exec_lo ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], m0 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], s103 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xcf,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xcf,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 clamp ; encoding: [0x05,0x80,0x2f,0xd7,0x01,0x05,0x02,0x00] +0x05,0x80,0x2f,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 div:2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x18] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x18 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], -v[1:2], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x20] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x20 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 mul:2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x08] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x08 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 mul:4 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x10] +0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x10 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], vcc_hi ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[1:2], vcc_lo ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x2f,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], v[254:255], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0xfe,0x05,0x02,0x00] +0x05,0x00,0x2f,0xd7,0xfe,0x05,0x02,0x00 + +# GFX11: v_trig_preop_f64 v[5:6], vcc, v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x2f,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_trunc_f32_e32 v255, v1 ; encoding: [0x01,0x43,0xfe,0x7f] +0x01,0x43,0xfe,0x7f + +# GFX11: v_trunc_f32_e32 v5, 0.5 ; encoding: [0xf0,0x42,0x0a,0x7e] +0xf0,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, 0 ; encoding: [0x80,0x42,0x0a,0x7e] +0x80,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, 0x3f717273 ; encoding: [0xff,0x42,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x42,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_trunc_f32_e32 v5, 0xaf123456 ; encoding: [0xff,0x42,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x42,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f32_e32 v5, -1 ; encoding: [0xc1,0x42,0x0a,0x7e] +0xc1,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, -4.0 ; encoding: [0xf7,0x42,0x0a,0x7e] +0xf7,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, exec_hi ; encoding: [0x7f,0x42,0x0a,0x7e] +0x7f,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, exec_lo ; encoding: [0x7e,0x42,0x0a,0x7e] +0x7e,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, m0 ; encoding: [0x7d,0x42,0x0a,0x7e] +0x7d,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, s103 ; encoding: [0x67,0x42,0x0a,0x7e] +0x67,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, s1 ; encoding: [0x01,0x42,0x0a,0x7e] +0x01,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, v1 ; encoding: [0x01,0x43,0x0a,0x7e] +0x01,0x43,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, v255 ; encoding: [0xff,0x43,0x0a,0x7e] +0xff,0x43,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x42,0x0a,0x7e] +0x6b,0x42,0x0a,0x7e + +# GFX11: v_trunc_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x42,0x0a,0x7e] +0x6a,0x42,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[254:255], v[1:2] ; encoding: [0x01,0x2f,0xfc,0x7f] +0x01,0x2f,0xfc,0x7f + +# GFX11: v_trunc_f64_e32 v[5:6], 0.5 ; encoding: [0xf0,0x2e,0x0a,0x7e] +0xf0,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], 0 ; encoding: [0x80,0x2e,0x0a,0x7e] +0x80,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], 0x3f717273 ; encoding: [0xff,0x2e,0x0a,0x7e,0x73,0x72,0x71,0x3f] +0xff,0x2e,0x0a,0x7e,0x73,0x72,0x71,0x3f + +# GFX11: v_trunc_f64_e32 v[5:6], 0xaf123456 ; encoding: [0xff,0x2e,0x0a,0x7e,0x56,0x34,0x12,0xaf] +0xff,0x2e,0x0a,0x7e,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f64_e32 v[5:6], -1 ; encoding: [0xc1,0x2e,0x0a,0x7e] +0xc1,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], -4.0 ; encoding: [0xf7,0x2e,0x0a,0x7e] +0xf7,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], exec ; encoding: [0x7e,0x2e,0x0a,0x7e] +0x7e,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], s[102:103] ; encoding: [0x66,0x2e,0x0a,0x7e] +0x66,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], s[2:3] ; encoding: [0x02,0x2e,0x0a,0x7e] +0x02,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], s[4:5] ; encoding: [0x04,0x2e,0x0a,0x7e] +0x04,0x2e,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], v[1:2] ; encoding: [0x01,0x2f,0x0a,0x7e] +0x01,0x2f,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], v[254:255] ; encoding: [0xfe,0x2f,0x0a,0x7e] +0xfe,0x2f,0x0a,0x7e + +# GFX11: v_trunc_f64_e32 v[5:6], vcc ; encoding: [0x6a,0x2e,0x0a,0x7e] +0x6a,0x2e,0x0a,0x7e + +# GFX11: v_writelane_b32 v255, 0, s2 ; encoding: [0xff,0x00,0x61,0xd7,0x80,0x04,0x00,0x00] +0xff,0x00,0x61,0xd7,0x80,0x04,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, 0 ; encoding: [0x05,0x00,0x61,0xd7,0x80,0x00,0x01,0x00] +0x05,0x00,0x61,0xd7,0x80,0x00,0x01,0x00 + +# GFX11: v_writelane_b32 v5, 0.5, s2 ; encoding: [0x05,0x00,0x61,0xd7,0xf0,0x04,0x00,0x00] +0x05,0x00,0x61,0xd7,0xf0,0x04,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, m0 ; encoding: [0x05,0x00,0x61,0xd7,0x80,0xfa,0x00,0x00] +0x05,0x00,0x61,0xd7,0x80,0xfa,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, s101 ; encoding: [0x05,0x00,0x61,0xd7,0x80,0xca,0x00,0x00] +0x05,0x00,0x61,0xd7,0x80,0xca,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, s2 ; encoding: [0x05,0x00,0x61,0xd7,0x80,0x04,0x00,0x00] +0x05,0x00,0x61,0xd7,0x80,0x04,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, vcc_hi ; encoding: [0x05,0x00,0x61,0xd7,0x80,0xd6,0x00,0x00] +0x05,0x00,0x61,0xd7,0x80,0xd6,0x00,0x00 + +# GFX11: v_writelane_b32 v5, 0, vcc_lo ; encoding: [0x05,0x00,0x61,0xd7,0x80,0xd4,0x00,0x00] +0x05,0x00,0x61,0xd7,0x80,0xd4,0x00,0x00 + +# GFX11: v_writelane_b32 v5, -1, s2 ; encoding: [0x05,0x00,0x61,0xd7,0xc1,0x04,0x00,0x00] +0x05,0x00,0x61,0xd7,0xc1,0x04,0x00,0x00 + +# GFX11: v_writelane_b32 v5, -4.0, s2 ; encoding: [0x05,0x00,0x61,0xd7,0xf7,0x04,0x00,0x00] +0x05,0x00,0x61,0xd7,0xf7,0x04,0x00,0x00 + +# GFX11: v_xad_u32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x45,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x45,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_xad_u32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, s101, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x65,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x65,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x45,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_xad_u32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x45,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_xad_u32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x45,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_xad_u32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x45,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_xad_u32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, s101, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xcb,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xcb,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x45,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_xad_u32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x45,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_xad_u32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x45,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_xad_u32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x45,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_xad_u32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x45,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_xad_u32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_xad_u32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_xad_u32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_xad_u32 v5, v1, v2, s101 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x96,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0x96,0x01 + +# GFX11: v_xad_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_xad_u32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x45,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_xad_u32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_xad_u32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_xad_u32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x45,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_xad_u32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x45,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_xad_u32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x45,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_xad_u32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_xad_u32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x45,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x45,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x40,0xd6,0x01,0x05,0x0e,0x04] +0xff,0x00,0x40,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0xf0,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x80,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x80,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, -1, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0xc1,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0xc1,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, -4.0, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0xf7,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0xf7,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, exec_hi, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x7f,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x7f,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, exec_lo, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x7e,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x7e,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, m0, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x7d,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x7d,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, s103, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x67,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x67,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, s1, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x01,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x40,0xd6,0x01,0xe1,0x0d,0x04 + +# GFX11: v_xor3_b32 v5, v1, 0, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x01,0x0d,0x04] +0x05,0x00,0x40,0xd6,0x01,0x01,0x0d,0x04 + +# GFX11: v_xor3_b32 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x83,0x0d,0x04] +0x05,0x00,0x40,0xd6,0x01,0x83,0x0d,0x04 + +# GFX11: v_xor3_b32 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xef,0x0d,0x04] +0x05,0x00,0x40,0xd6,0x01,0xef,0x0d,0x04 + +# GFX11: v_xor3_b32 v5, v1, exec_hi, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xff,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xff,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, exec_lo, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xfd,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xfd,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, m0, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xfb,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xfb,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, s103, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xcf,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xcf,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, s2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0x05,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xc2,0x03] +0x05,0x00,0x40,0xd6,0x01,0x05,0xc2,0x03 + +# GFX11: v_xor3_b32 v5, v1, v2, 0 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x02,0x02] +0x05,0x00,0x40,0xd6,0x01,0x05,0x02,0x02 + +# GFX11: v_xor3_b32 v5, v1, v2, -1 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x06,0x03] +0x05,0x00,0x40,0xd6,0x01,0x05,0x06,0x03 + +# GFX11: v_xor3_b32 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xde,0x03] +0x05,0x00,0x40,0xd6,0x01,0x05,0xde,0x03 + +# GFX11: v_xor3_b32 v5, v1, v255, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xff,0x0f,0x04] +0x05,0x00,0x40,0xd6,0x01,0xff,0x0f,0x04 + +# GFX11: v_xor3_b32 v5, v1, v2, exec_hi ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xfe,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0xfe,0x01 + +# GFX11: v_xor3_b32 v5, v1, v2, exec_lo ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xfa,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0xfa,0x01 + +# GFX11: v_xor3_b32 v5, v1, v2, m0 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xf6,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0xf6,0x01 + +# GFX11: v_xor3_b32 v5, v1, v2, s103 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x9e,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0x9e,0x01 + +# GFX11: v_xor3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_xor3_b32 v5, v1, v2, v255 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xfe,0x07] +0x05,0x00,0x40,0xd6,0x01,0x05,0xfe,0x07 + +# GFX11: v_xor3_b32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0xae,0x01 + +# GFX11: v_xor3_b32 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x40,0xd6,0x01,0x05,0xaa,0x01 + +# GFX11: v_xor3_b32 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xd7,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xd7,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0xd5,0x0c,0x04] +0x05,0x00,0x40,0xd6,0x01,0xd5,0x0c,0x04 + +# GFX11: v_xor3_b32 v5, v255, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0xff,0x05,0x0e,0x04] +0x05,0x00,0x40,0xd6,0xff,0x05,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x6b,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x6b,0x04,0x0e,0x04 + +# GFX11: v_xor3_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x40,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x40,0xd6,0x6a,0x04,0x0e,0x04 + +# GFX11: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x0f,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x0f,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x02,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x02,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x0a,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x0a,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0xc0,0x0a,0xcc,0x02,0x07,0x02,0x18] +0x01,0xc0,0x0a,0xcc,0x02,0x07,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18] +0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x18] +0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x10] +0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x10 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x08] +0x01,0x50,0x0a,0xcc,0x02,0x07,0x02,0x08 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x18] +0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x10] +0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x10 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x08] +0x01,0x48,0x0a,0xcc,0x02,0x07,0x02,0x08 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18] +0x01,0x58,0x0a,0xcc,0x02,0x07,0x02,0x18 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00] +0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x00 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x10] +0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x10 + +# GFX11: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x08] +0x01,0x40,0x0a,0xcc,0x02,0x07,0x02,0x08 + +# GFX11: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x06,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x06,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x0e,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x0e,0xcc,0x01,0x05,0x0e,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0xc0,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0x44,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0x42,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0x41,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0x1c] +0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0x1c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x9c] +0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x9c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x5c] +0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x5c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x3c] +0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0x3c + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0xfc] +0x08,0x40,0x0e,0xcc,0x00,0x01,0x04,0xfc + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0xfc] +0x08,0x47,0x0e,0xcc,0x00,0x01,0x04,0xfc + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x0e,0xcc,0x00,0x01,0x04,0x04] +0x08,0x60,0x0e,0xcc,0x00,0x01,0x04,0x04 + +# GFX11: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x0e,0xcc,0x00,0x01,0x04,0x04] +0x08,0x00,0x0e,0xcc,0x00,0x01,0x04,0x04 + +# GFX11: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x04,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x04,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x05,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x05,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x12,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x12,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x07,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x07,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x0c,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x0c,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x11,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x11,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x08,0xcc,0x01,0x05,0x02,0x18] +0x00,0xc0,0x08,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x08,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x08,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x0d,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x0d,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x10,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x10,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x01,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x01,0xcc,0x01,0x05,0x02,0x18 + +# GFX11: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x03,0xcc,0x01,0x05,0x02,0x18] +0x00,0x40,0x03,0xcc,0x01,0x05,0x02,0x18 + +# W32: v_cmp_class_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0xfc,0x7c] +0xf0,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0xfc,0x7c] +0x80,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0xfc,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_class_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0xfc,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xfc,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_class_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0xfc,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_class_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0xfc,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xfc,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_class_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0xfc,0x7c] +0xc1,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0xfc,0x7c] +0xf7,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0xfc,0x7c] +0x7f,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0xfc,0x7c] +0x7e,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0xfc,0x7c] +0x7d,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0xfc,0x7c] +0x65,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0xfc,0x7c] +0x01,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0xfd,0x7c] +# W64: v_cmp_class_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0xfd,0x7c] +0x01,0xff,0xfd,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0xfc,0x7c] +0x01,0x05,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0xfc,0x7c] +0xff,0x05,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0xfc,0x7c] +0x6b,0x04,0xfc,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0xfc,0x7c] +0x6a,0x04,0xfc,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0xfe,0x7c] +0xf0,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0xfe,0x7c] +0x80,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0xfe,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_class_f64_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0xfe,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xfe,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_class_f64_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0xfe,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_class_f64_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0xfe,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xfe,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_class_f64_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0xfe,0x7c] +0xc1,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0xfe,0x7c] +0xf7,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, exec, v2 ; encoding: [0x7e,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, exec, v2 ; encoding: [0x7e,0x04,0xfe,0x7c] +0x7e,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, s[100:101], v2 ; encoding: [0x64,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, s[100:101], v2 ; encoding: [0x64,0x04,0xfe,0x7c] +0x64,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, s[2:3], v2 ; encoding: [0x02,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, s[2:3], v2 ; encoding: [0x02,0x04,0xfe,0x7c] +0x02,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, s[4:5], v2 ; encoding: [0x04,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, s[4:5], v2 ; encoding: [0x04,0x04,0xfe,0x7c] +0x04,0x04,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, v[1:2], v255 ; encoding: [0x01,0xff,0xff,0x7c] +# W64: v_cmp_class_f64_e32 vcc, v[1:2], v255 ; encoding: [0x01,0xff,0xff,0x7c] +0x01,0xff,0xff,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, v[1:2], v2 ; encoding: [0x01,0x05,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, v[1:2], v2 ; encoding: [0x01,0x05,0xfe,0x7c] +0x01,0x05,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, v[254:255], v2 ; encoding: [0xfe,0x05,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, v[254:255], v2 ; encoding: [0xfe,0x05,0xfe,0x7c] +0xfe,0x05,0xfe,0x7c + +# W32: v_cmp_class_f64_e32 vcc_lo, vcc, v2 ; encoding: [0x6a,0x04,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, vcc, v2 ; encoding: [0x6a,0x04,0xfe,0x7c] +0x6a,0x04,0xfe,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x04,0x7c] +0x80,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x04,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_eq_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x04,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x04,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_eq_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x04,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_eq_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x04,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x04,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_eq_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x04,0x7c] +0xc1,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x04,0x7c] +0x7f,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x04,0x7c] +0x7e,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x04,0x7c] +0x7d,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x04,0x7c] +0x65,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x04,0x7c] +0x01,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x04,0x7c] +0x01,0x05,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x04,0x7c] +0x6b,0x04,0x04,0x7c + +# W32: v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x04,0x7c] +0x6a,0x04,0x04,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x24,0x7c] +0xf0,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x24,0x7c] +0x80,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x24,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x24,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x24,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x24,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x24,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x24,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x24,0x7c] +0xc1,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x24,0x7c] +0xf7,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x24,0x7c] +0x7f,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x24,0x7c] +0x7e,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x24,0x7c] +0x7d,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x24,0x7c] +0x65,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x24,0x7c] +0x01,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x25,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x25,0x7c] +0x01,0xff,0x25,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x24,0x7c] +0x01,0x05,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x24,0x7c] +0xff,0x05,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x24,0x7c] +0x6b,0x04,0x24,0x7c + +# W32: v_cmp_eq_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x24,0x7c] +0x6a,0x04,0x24,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x44,0x7c] +0xf0,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x44,0x7c] +0x80,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x44,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x44,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x44,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x44,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x44,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x44,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x44,0x7c] +0xc1,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x44,0x7c] +0xf7,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x44,0x7c] +0x7e,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x44,0x7c] +0x64,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x44,0x7c] +0x02,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x44,0x7c] +0x04,0x04,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x44,0x7c] +0x01,0x05,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x45,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x45,0x7c] +0x01,0xfd,0x45,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x44,0x7c] +0xfe,0x05,0x44,0x7c + +# W32: v_cmp_eq_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x44,0x7c] +0x6a,0x04,0x44,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x64,0x7c] +0x80,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x64,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_eq_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x64,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x64,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_eq_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x64,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_eq_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x64,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x64,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_eq_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x64,0x7c] +0xc1,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x64,0x7c] +0x7f,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x64,0x7c] +0x7e,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x64,0x7c] +0x7d,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x64,0x7c] +0x65,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x64,0x7c] +0x01,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x64,0x7c] +0x01,0x05,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x64,0x7c] +0x6b,0x04,0x64,0x7c + +# W32: v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x64,0x7c] +0x6a,0x04,0x64,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x84,0x7c] +0xf0,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x84,0x7c] +0x80,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x84,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x84,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x84,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x84,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x84,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x84,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x84,0x7c] +0xc1,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x84,0x7c] +0xf7,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x84,0x7c] +0x7f,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x84,0x7c] +0x7e,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x84,0x7c] +0x7d,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x84,0x7c] +0x65,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x84,0x7c] +0x01,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x85,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x85,0x7c] +0x01,0xff,0x85,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x84,0x7c] +0x01,0x05,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x84,0x7c] +0xff,0x05,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x84,0x7c] +0x6b,0x04,0x84,0x7c + +# W32: v_cmp_eq_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x84,0x7c] +0x6a,0x04,0x84,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa4,0x7c] +0xf0,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xa4,0x7c] +0x80,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa4,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa4,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xa4,0x7c] +0xc1,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa4,0x7c] +0xf7,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xa4,0x7c] +0x7e,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa4,0x7c] +0x64,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa4,0x7c] +0x02,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa4,0x7c] +0x04,0x04,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa4,0x7c] +0x01,0x05,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa5,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa5,0x7c] +0x01,0xfd,0xa5,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa4,0x7c] +0xfe,0x05,0xa4,0x7c + +# W32: v_cmp_eq_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa4,0x7c] +0x6a,0x04,0xa4,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x74,0x7c] +0x80,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x74,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_eq_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x74,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x74,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_eq_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x74,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_eq_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x74,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x74,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_eq_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x74,0x7c] +0xc1,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x74,0x7c] +0x7f,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x74,0x7c] +0x7e,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x74,0x7c] +0x7d,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x74,0x7c] +0x65,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x74,0x7c] +0x01,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x74,0x7c] +0x01,0x05,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x74,0x7c] +0x6b,0x04,0x74,0x7c + +# W32: v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x74,0x7c] +0x6a,0x04,0x74,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x94,0x7c] +0xf0,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x94,0x7c] +0x80,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x94,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x94,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x94,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x94,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x94,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x94,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x94,0x7c] +0xc1,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x94,0x7c] +0xf7,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x94,0x7c] +0x7f,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x94,0x7c] +0x7e,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x94,0x7c] +0x7d,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x94,0x7c] +0x65,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x94,0x7c] +0x01,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x95,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x95,0x7c] +0x01,0xff,0x95,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x94,0x7c] +0x01,0x05,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x94,0x7c] +0xff,0x05,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x94,0x7c] +0x6b,0x04,0x94,0x7c + +# W32: v_cmp_eq_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x94,0x7c] +0x6a,0x04,0x94,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb4,0x7c] +0xf0,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xb4,0x7c] +0x80,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_eq_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb4,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_eq_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_eq_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb4,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_eq_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xb4,0x7c] +0xc1,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb4,0x7c] +0xf7,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xb4,0x7c] +0x7e,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb4,0x7c] +0x64,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb4,0x7c] +0x02,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb4,0x7c] +0x04,0x04,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb4,0x7c] +0x01,0x05,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb5,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb5,0x7c] +0x01,0xfd,0xb5,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb4,0x7c] +0xfe,0x05,0xb4,0x7c + +# W32: v_cmp_eq_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb4,0x7c] +0x6a,0x04,0xb4,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x00,0x7c] +0x80,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x00,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_f_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x00,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x00,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_f_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x00,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_f_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x00,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x00,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_f_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x00,0x7c] +0xc1,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x00,0x7c] +0x7f,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x00,0x7c] +0x7e,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x00,0x7c] +0x7d,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x00,0x7c] +0x65,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x00,0x7c] +0x01,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x00,0x7c] +0x01,0x05,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x00,0x7c] +0x6b,0x04,0x00,0x7c + +# W32: v_cmp_f_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x00,0x7c] +0x6a,0x04,0x00,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x20,0x7c] +0xf0,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x20,0x7c] +0x80,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x20,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x20,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x20,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x20,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x20,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x20,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x20,0x7c] +0xc1,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x20,0x7c] +0xf7,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x20,0x7c] +0x7f,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x20,0x7c] +0x7e,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x20,0x7c] +0x7d,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x20,0x7c] +0x65,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x20,0x7c] +0x01,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x21,0x7c] +# W64: v_cmp_f_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x21,0x7c] +0x01,0xff,0x21,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c] +0x01,0x05,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x20,0x7c] +0xff,0x05,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x20,0x7c] +0x6b,0x04,0x20,0x7c + +# W32: v_cmp_f_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x20,0x7c] +0x6a,0x04,0x20,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x40,0x7c] +0xf0,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x40,0x7c] +0x80,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x40,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x40,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x40,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x40,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x40,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x40,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x40,0x7c] +0xc1,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x40,0x7c] +0xf7,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x40,0x7c] +0x7e,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x40,0x7c] +0x64,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x40,0x7c] +0x02,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x40,0x7c] +0x04,0x04,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x40,0x7c] +0x01,0x05,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x41,0x7c] +# W64: v_cmp_f_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x41,0x7c] +0x01,0xfd,0x41,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x40,0x7c] +0xfe,0x05,0x40,0x7c + +# W32: v_cmp_f_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x40,0x7c] +0x6a,0x04,0x40,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x80,0x7c] +0xf0,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x80,0x7c] +0x80,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x80,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x80,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x80,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x80,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x80,0x7c] +0xc1,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x80,0x7c] +0xf7,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x80,0x7c] +0x7f,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x80,0x7c] +0x7e,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x80,0x7c] +0x7d,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x80,0x7c] +0x65,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x80,0x7c] +0x01,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x81,0x7c] +# W64: v_cmp_f_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x81,0x7c] +0x01,0xff,0x81,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x80,0x7c] +0x01,0x05,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x80,0x7c] +0xff,0x05,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x80,0x7c] +0x6b,0x04,0x80,0x7c + +# W32: v_cmp_f_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x80,0x7c] +0x6a,0x04,0x80,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa0,0x7c] +0xf0,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xa0,0x7c] +0x80,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa0,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa0,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xa0,0x7c] +0xc1,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa0,0x7c] +0xf7,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xa0,0x7c] +0x7e,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa0,0x7c] +0x64,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa0,0x7c] +0x02,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa0,0x7c] +0x04,0x04,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa0,0x7c] +0x01,0x05,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa1,0x7c] +# W64: v_cmp_f_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa1,0x7c] +0x01,0xfd,0xa1,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa0,0x7c] +0xfe,0x05,0xa0,0x7c + +# W32: v_cmp_f_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa0,0x7c] +0x6a,0x04,0xa0,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x90,0x7c] +0xf0,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x90,0x7c] +0x80,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x90,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x90,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x90,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x90,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x90,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x90,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x90,0x7c] +0xc1,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x90,0x7c] +0xf7,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x90,0x7c] +0x7f,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x90,0x7c] +0x7e,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x90,0x7c] +0x7d,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x90,0x7c] +0x65,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x90,0x7c] +0x01,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x91,0x7c] +# W64: v_cmp_f_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x91,0x7c] +0x01,0xff,0x91,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x90,0x7c] +0x01,0x05,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x90,0x7c] +0xff,0x05,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x90,0x7c] +0x6b,0x04,0x90,0x7c + +# W32: v_cmp_f_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x90,0x7c] +0x6a,0x04,0x90,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb0,0x7c] +0xf0,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xb0,0x7c] +0x80,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_f_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb0,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_f_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_f_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb0,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_f_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xb0,0x7c] +0xc1,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb0,0x7c] +0xf7,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xb0,0x7c] +0x7e,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb0,0x7c] +0x64,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb0,0x7c] +0x02,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb0,0x7c] +0x04,0x04,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb0,0x7c] +0x01,0x05,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb1,0x7c] +# W64: v_cmp_f_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb1,0x7c] +0x01,0xfd,0xb1,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb0,0x7c] +0xfe,0x05,0xb0,0x7c + +# W32: v_cmp_f_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb0,0x7c] +0x6a,0x04,0xb0,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x0c,0x7c] +0x80,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x0c,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ge_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x0c,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x0c,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ge_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ge_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0c,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ge_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x0c,0x7c] +0xc1,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x0c,0x7c] +0x7f,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x0c,0x7c] +0x7e,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x0c,0x7c] +0x7d,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x0c,0x7c] +0x65,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x0c,0x7c] +0x01,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0c,0x7c] +0x01,0x05,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0c,0x7c] +0x6b,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0c,0x7c] +0x6a,0x04,0x0c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x2c,0x7c] +0xf0,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x2c,0x7c] +0x80,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x2c,0x7c] +0xc1,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x2c,0x7c] +0xf7,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x2c,0x7c] +0x7f,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x2c,0x7c] +0x7e,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x2c,0x7c] +0x7d,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x2c,0x7c] +0x65,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x2c,0x7c] +0x01,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x2d,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x2d,0x7c] +0x01,0xff,0x2d,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2c,0x7c] +0x01,0x05,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x2c,0x7c] +0xff,0x05,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2c,0x7c] +0x6b,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2c,0x7c] +0x6a,0x04,0x2c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4c,0x7c] +0xf0,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x4c,0x7c] +0x80,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x4c,0x7c] +0xc1,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4c,0x7c] +0xf7,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x4c,0x7c] +0x7e,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4c,0x7c] +0x64,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4c,0x7c] +0x02,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4c,0x7c] +0x04,0x04,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4c,0x7c] +0x01,0x05,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4d,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4d,0x7c] +0x01,0xfd,0x4d,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4c,0x7c] +0xfe,0x05,0x4c,0x7c + +# W32: v_cmp_ge_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4c,0x7c] +0x6a,0x04,0x4c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x6c,0x7c] +0x80,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x6c,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ge_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x6c,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x6c,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ge_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ge_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x6c,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ge_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x6c,0x7c] +0xc1,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x6c,0x7c] +0x7f,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x6c,0x7c] +0x7e,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x6c,0x7c] +0x7d,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x6c,0x7c] +0x65,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x6c,0x7c] +0x01,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x6c,0x7c] +0x01,0x05,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x6c,0x7c] +0x6b,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x6c,0x7c] +0x6a,0x04,0x6c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x8c,0x7c] +0xf0,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x8c,0x7c] +0x80,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x8c,0x7c] +0xc1,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x8c,0x7c] +0xf7,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x8c,0x7c] +0x7f,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x8c,0x7c] +0x7e,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x8c,0x7c] +0x7d,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x8c,0x7c] +0x65,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x8c,0x7c] +0x01,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x8d,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x8d,0x7c] +0x01,0xff,0x8d,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8c,0x7c] +0x01,0x05,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x8c,0x7c] +0xff,0x05,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8c,0x7c] +0x6b,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8c,0x7c] +0x6a,0x04,0x8c,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xac,0x7c] +0xf0,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xac,0x7c] +0x80,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xac,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xac,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xac,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xac,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xac,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xac,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xac,0x7c] +0xc1,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xac,0x7c] +0xf7,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xac,0x7c] +0x7e,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xac,0x7c] +0x64,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xac,0x7c] +0x02,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xac,0x7c] +0x04,0x04,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xac,0x7c] +0x01,0x05,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xad,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xad,0x7c] +0x01,0xfd,0xad,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xac,0x7c] +0xfe,0x05,0xac,0x7c + +# W32: v_cmp_ge_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xac,0x7c] +0x6a,0x04,0xac,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x7c,0x7c] +0x80,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x7c,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ge_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x7c,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x7c,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ge_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ge_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x7c,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ge_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x7c,0x7c] +0xc1,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x7c,0x7c] +0x7f,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x7c,0x7c] +0x7e,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x7c,0x7c] +0x7d,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x7c,0x7c] +0x65,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x7c,0x7c] +0x01,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x7c,0x7c] +0x01,0x05,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x7c,0x7c] +0x6b,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x7c,0x7c] +0x6a,0x04,0x7c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x9c,0x7c] +0xf0,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x9c,0x7c] +0x80,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x9c,0x7c] +0xc1,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x9c,0x7c] +0xf7,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x9c,0x7c] +0x7f,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x9c,0x7c] +0x7e,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x9c,0x7c] +0x7d,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x9c,0x7c] +0x65,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x9c,0x7c] +0x01,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x9d,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x9d,0x7c] +0x01,0xff,0x9d,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9c,0x7c] +0x01,0x05,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x9c,0x7c] +0xff,0x05,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9c,0x7c] +0x6b,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9c,0x7c] +0x6a,0x04,0x9c,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbc,0x7c] +0xf0,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xbc,0x7c] +0x80,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ge_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xbc,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ge_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ge_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xbc,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ge_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xbc,0x7c] +0xc1,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbc,0x7c] +0xf7,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xbc,0x7c] +0x7e,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbc,0x7c] +0x64,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbc,0x7c] +0x02,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbc,0x7c] +0x04,0x04,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbc,0x7c] +0x01,0x05,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbd,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbd,0x7c] +0x01,0xfd,0xbd,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbc,0x7c] +0xfe,0x05,0xbc,0x7c + +# W32: v_cmp_ge_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xbc,0x7c] +0x6a,0x04,0xbc,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x08,0x7c] +0x80,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x08,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_gt_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x08,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x08,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_gt_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x08,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_gt_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x08,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x08,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_gt_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x08,0x7c] +0xc1,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x08,0x7c] +0x7f,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x08,0x7c] +0x7e,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x08,0x7c] +0x7d,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x08,0x7c] +0x65,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x08,0x7c] +0x01,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x08,0x7c] +0x01,0x05,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x08,0x7c] +0x6b,0x04,0x08,0x7c + +# W32: v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x08,0x7c] +0x6a,0x04,0x08,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x28,0x7c] +0xf0,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x28,0x7c] +0x80,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x28,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x28,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x28,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x28,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x28,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x28,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x28,0x7c] +0xc1,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x28,0x7c] +0xf7,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x28,0x7c] +0x7f,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x28,0x7c] +0x7e,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x28,0x7c] +0x7d,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x28,0x7c] +0x65,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x28,0x7c] +0x01,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x29,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x29,0x7c] +0x01,0xff,0x29,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x28,0x7c] +0x01,0x05,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x28,0x7c] +0xff,0x05,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x28,0x7c] +0x6b,0x04,0x28,0x7c + +# W32: v_cmp_gt_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x28,0x7c] +0x6a,0x04,0x28,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x48,0x7c] +0xf0,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x48,0x7c] +0x80,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x48,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x48,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x48,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x48,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x48,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x48,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x48,0x7c] +0xc1,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x48,0x7c] +0xf7,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x48,0x7c] +0x7e,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x48,0x7c] +0x64,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x48,0x7c] +0x02,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x48,0x7c] +0x04,0x04,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x48,0x7c] +0x01,0x05,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x49,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x49,0x7c] +0x01,0xfd,0x49,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x48,0x7c] +0xfe,0x05,0x48,0x7c + +# W32: v_cmp_gt_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x48,0x7c] +0x6a,0x04,0x48,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x68,0x7c] +0x80,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x68,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_gt_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x68,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x68,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_gt_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x68,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_gt_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x68,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x68,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_gt_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x68,0x7c] +0xc1,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x68,0x7c] +0x7f,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x68,0x7c] +0x7e,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x68,0x7c] +0x7d,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x68,0x7c] +0x65,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x68,0x7c] +0x01,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x68,0x7c] +0x01,0x05,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x68,0x7c] +0x6b,0x04,0x68,0x7c + +# W32: v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x68,0x7c] +0x6a,0x04,0x68,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x88,0x7c] +0xf0,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x88,0x7c] +0x80,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x88,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x88,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x88,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x88,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x88,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x88,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x88,0x7c] +0xc1,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x88,0x7c] +0xf7,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x88,0x7c] +0x7f,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x88,0x7c] +0x7e,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x88,0x7c] +0x7d,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x88,0x7c] +0x65,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x88,0x7c] +0x01,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x89,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x89,0x7c] +0x01,0xff,0x89,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x88,0x7c] +0x01,0x05,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x88,0x7c] +0xff,0x05,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x88,0x7c] +0x6b,0x04,0x88,0x7c + +# W32: v_cmp_gt_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x88,0x7c] +0x6a,0x04,0x88,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa8,0x7c] +0xf0,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xa8,0x7c] +0x80,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa8,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa8,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xa8,0x7c] +0xc1,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa8,0x7c] +0xf7,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xa8,0x7c] +0x7e,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa8,0x7c] +0x64,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa8,0x7c] +0x02,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa8,0x7c] +0x04,0x04,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa8,0x7c] +0x01,0x05,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa9,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa9,0x7c] +0x01,0xfd,0xa9,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa8,0x7c] +0xfe,0x05,0xa8,0x7c + +# W32: v_cmp_gt_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa8,0x7c] +0x6a,0x04,0xa8,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x78,0x7c] +0x80,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x78,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_gt_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x78,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x78,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_gt_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x78,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_gt_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x78,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x78,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_gt_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x78,0x7c] +0xc1,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x78,0x7c] +0x7f,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x78,0x7c] +0x7e,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x78,0x7c] +0x7d,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x78,0x7c] +0x65,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x78,0x7c] +0x01,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x78,0x7c] +0x01,0x05,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x78,0x7c] +0x6b,0x04,0x78,0x7c + +# W32: v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x78,0x7c] +0x6a,0x04,0x78,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x98,0x7c] +0xf0,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x98,0x7c] +0x80,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x98,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x98,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x98,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x98,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x98,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x98,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x98,0x7c] +0xc1,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x98,0x7c] +0xf7,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x98,0x7c] +0x7f,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x98,0x7c] +0x7e,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x98,0x7c] +0x7d,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x98,0x7c] +0x65,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x98,0x7c] +0x01,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x99,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x99,0x7c] +0x01,0xff,0x99,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x98,0x7c] +0x01,0x05,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x98,0x7c] +0xff,0x05,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x98,0x7c] +0x6b,0x04,0x98,0x7c + +# W32: v_cmp_gt_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x98,0x7c] +0x6a,0x04,0x98,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb8,0x7c] +0xf0,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xb8,0x7c] +0x80,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_gt_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb8,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_gt_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_gt_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb8,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_gt_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xb8,0x7c] +0xc1,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb8,0x7c] +0xf7,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xb8,0x7c] +0x7e,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb8,0x7c] +0x64,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb8,0x7c] +0x02,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb8,0x7c] +0x04,0x04,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb8,0x7c] +0x01,0x05,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb9,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb9,0x7c] +0x01,0xfd,0xb9,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb8,0x7c] +0xfe,0x05,0xb8,0x7c + +# W32: v_cmp_gt_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb8,0x7c] +0x6a,0x04,0xb8,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x06,0x7c] +0x80,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x06,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_le_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x06,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x06,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_le_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x06,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_le_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x06,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x06,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_le_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x06,0x7c] +0xc1,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x06,0x7c] +0x7f,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x06,0x7c] +0x7e,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x06,0x7c] +0x7d,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x06,0x7c] +0x65,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x06,0x7c] +0x01,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x06,0x7c] +0x01,0x05,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x06,0x7c] +0x6b,0x04,0x06,0x7c + +# W32: v_cmp_le_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x06,0x7c] +0x6a,0x04,0x06,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x26,0x7c] +0xf0,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x26,0x7c] +0x80,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x26,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x26,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x26,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x26,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x26,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x26,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x26,0x7c] +0xc1,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x26,0x7c] +0xf7,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x26,0x7c] +0x7f,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x26,0x7c] +0x7e,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x26,0x7c] +0x7d,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x26,0x7c] +0x65,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x26,0x7c] +0x01,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x27,0x7c] +# W64: v_cmp_le_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x27,0x7c] +0x01,0xff,0x27,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x26,0x7c] +0x01,0x05,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x26,0x7c] +0xff,0x05,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x26,0x7c] +0x6b,0x04,0x26,0x7c + +# W32: v_cmp_le_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x26,0x7c] +0x6a,0x04,0x26,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x46,0x7c] +0xf0,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x46,0x7c] +0x80,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x46,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x46,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x46,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x46,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x46,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x46,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x46,0x7c] +0xc1,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x46,0x7c] +0xf7,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x46,0x7c] +0x7e,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x46,0x7c] +0x64,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x46,0x7c] +0x02,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x46,0x7c] +0x04,0x04,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x46,0x7c] +0x01,0x05,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x47,0x7c] +# W64: v_cmp_le_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x47,0x7c] +0x01,0xfd,0x47,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x46,0x7c] +0xfe,0x05,0x46,0x7c + +# W32: v_cmp_le_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x46,0x7c] +0x6a,0x04,0x46,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x66,0x7c] +0x80,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x66,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_le_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x66,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x66,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_le_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x66,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_le_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x66,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x66,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_le_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x66,0x7c] +0xc1,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x66,0x7c] +0x7f,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x66,0x7c] +0x7e,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x66,0x7c] +0x7d,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x66,0x7c] +0x65,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x66,0x7c] +0x01,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x66,0x7c] +0x01,0x05,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x66,0x7c] +0x6b,0x04,0x66,0x7c + +# W32: v_cmp_le_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x66,0x7c] +0x6a,0x04,0x66,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x86,0x7c] +0xf0,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x86,0x7c] +0x80,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x86,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x86,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x86,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x86,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x86,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x86,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x86,0x7c] +0xc1,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x86,0x7c] +0xf7,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x86,0x7c] +0x7f,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x86,0x7c] +0x7e,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x86,0x7c] +0x7d,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x86,0x7c] +0x65,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x86,0x7c] +0x01,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x87,0x7c] +# W64: v_cmp_le_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x87,0x7c] +0x01,0xff,0x87,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x86,0x7c] +0x01,0x05,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x86,0x7c] +0xff,0x05,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x86,0x7c] +0x6b,0x04,0x86,0x7c + +# W32: v_cmp_le_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x86,0x7c] +0x6a,0x04,0x86,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa6,0x7c] +0xf0,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xa6,0x7c] +0x80,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa6,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa6,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xa6,0x7c] +0xc1,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa6,0x7c] +0xf7,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xa6,0x7c] +0x7e,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa6,0x7c] +0x64,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa6,0x7c] +0x02,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa6,0x7c] +0x04,0x04,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa6,0x7c] +0x01,0x05,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa7,0x7c] +# W64: v_cmp_le_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa7,0x7c] +0x01,0xfd,0xa7,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa6,0x7c] +0xfe,0x05,0xa6,0x7c + +# W32: v_cmp_le_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa6,0x7c] +0x6a,0x04,0xa6,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x76,0x7c] +0x80,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x76,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_le_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x76,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x76,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_le_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x76,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_le_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x76,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x76,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_le_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x76,0x7c] +0xc1,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x76,0x7c] +0x7f,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x76,0x7c] +0x7e,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x76,0x7c] +0x7d,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x76,0x7c] +0x65,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x76,0x7c] +0x01,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x76,0x7c] +0x01,0x05,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x76,0x7c] +0x6b,0x04,0x76,0x7c + +# W32: v_cmp_le_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x76,0x7c] +0x6a,0x04,0x76,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x96,0x7c] +0xf0,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x96,0x7c] +0x80,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x96,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x96,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x96,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x96,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x96,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x96,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x96,0x7c] +0xc1,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x96,0x7c] +0xf7,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x96,0x7c] +0x7f,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x96,0x7c] +0x7e,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x96,0x7c] +0x7d,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x96,0x7c] +0x65,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x96,0x7c] +0x01,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x97,0x7c] +# W64: v_cmp_le_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x97,0x7c] +0x01,0xff,0x97,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x96,0x7c] +0x01,0x05,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x96,0x7c] +0xff,0x05,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x96,0x7c] +0x6b,0x04,0x96,0x7c + +# W32: v_cmp_le_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x96,0x7c] +0x6a,0x04,0x96,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb6,0x7c] +0xf0,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xb6,0x7c] +0x80,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_le_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb6,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_le_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_le_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb6,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_le_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xb6,0x7c] +0xc1,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb6,0x7c] +0xf7,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xb6,0x7c] +0x7e,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb6,0x7c] +0x64,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb6,0x7c] +0x02,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb6,0x7c] +0x04,0x04,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb6,0x7c] +0x01,0x05,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb7,0x7c] +# W64: v_cmp_le_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb7,0x7c] +0x01,0xfd,0xb7,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb6,0x7c] +0xfe,0x05,0xb6,0x7c + +# W32: v_cmp_le_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb6,0x7c] +0x6a,0x04,0xb6,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x0a,0x7c] +0x80,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x0a,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_lg_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x0a,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x0a,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_lg_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_lg_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0a,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_lg_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x7c] +0xc1,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x7c] +0x7f,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x7c] +0x7e,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x7c] +0x7d,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x0a,0x7c] +0x65,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x0a,0x7c] +0x01,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0a,0x7c] +0x01,0x05,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x7c] +0x6b,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x7c] +0x6a,0x04,0x0a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x2a,0x7c] +0xf0,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x2a,0x7c] +0x80,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lg_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lg_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lg_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lg_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x2a,0x7c] +0xc1,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x2a,0x7c] +0xf7,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x2a,0x7c] +0x7f,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x2a,0x7c] +0x7e,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x2a,0x7c] +0x7d,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x2a,0x7c] +0x65,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x2a,0x7c] +0x01,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x2b,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x2b,0x7c] +0x01,0xff,0x2b,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2a,0x7c] +0x01,0x05,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x2a,0x7c] +0xff,0x05,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2a,0x7c] +0x6b,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2a,0x7c] +0x6a,0x04,0x2a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4a,0x7c] +0xf0,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x4a,0x7c] +0x80,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lg_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lg_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lg_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lg_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x4a,0x7c] +0xc1,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4a,0x7c] +0xf7,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x4a,0x7c] +0x7e,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4a,0x7c] +0x64,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4a,0x7c] +0x02,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4a,0x7c] +0x04,0x04,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4a,0x7c] +0x01,0x05,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4b,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4b,0x7c] +0x01,0xfd,0x4b,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4a,0x7c] +0xfe,0x05,0x4a,0x7c + +# W32: v_cmp_lg_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4a,0x7c] +0x6a,0x04,0x4a,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x02,0x7c] +0x80,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x02,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_lt_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x02,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x02,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_lt_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x02,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_lt_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x02,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x02,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_lt_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x02,0x7c] +0xc1,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7c] +0x7f,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7c] +0x7e,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x02,0x7c] +0x7d,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x02,0x7c] +0x65,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x02,0x7c] +0x01,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x02,0x7c] +0x01,0x05,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7c] +0x6b,0x04,0x02,0x7c + +# W32: v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7c] +0x6a,0x04,0x02,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x22,0x7c] +0xf0,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x22,0x7c] +0x80,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x22,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x22,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x22,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x22,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x22,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x22,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x22,0x7c] +0xc1,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x22,0x7c] +0xf7,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x22,0x7c] +0x7f,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x22,0x7c] +0x7e,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x22,0x7c] +0x7d,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x22,0x7c] +0x65,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x22,0x7c] +0x01,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x23,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x23,0x7c] +0x01,0xff,0x23,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] +0x01,0x05,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x22,0x7c] +0xff,0x05,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x22,0x7c] +0x6b,0x04,0x22,0x7c + +# W32: v_cmp_lt_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x22,0x7c] +0x6a,0x04,0x22,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x42,0x7c] +0xf0,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x42,0x7c] +0x80,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x42,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x42,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x42,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x42,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x42,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x42,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x42,0x7c] +0xc1,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x42,0x7c] +0xf7,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x42,0x7c] +0x7e,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x42,0x7c] +0x64,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x42,0x7c] +0x02,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x42,0x7c] +0x04,0x04,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x42,0x7c] +0x01,0x05,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x43,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x43,0x7c] +0x01,0xfd,0x43,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x42,0x7c] +0xfe,0x05,0x42,0x7c + +# W32: v_cmp_lt_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x42,0x7c] +0x6a,0x04,0x42,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x62,0x7c] +0x80,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x62,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_lt_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x62,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x62,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_lt_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x62,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_lt_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x62,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x62,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_lt_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x62,0x7c] +0xc1,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x62,0x7c] +0x7f,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x62,0x7c] +0x7e,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x62,0x7c] +0x7d,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x62,0x7c] +0x65,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x62,0x7c] +0x01,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x62,0x7c] +0x01,0x05,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x62,0x7c] +0x6b,0x04,0x62,0x7c + +# W32: v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x62,0x7c] +0x6a,0x04,0x62,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x82,0x7c] +0xf0,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x82,0x7c] +0x80,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x82,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x82,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x82,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x82,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x82,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x82,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x82,0x7c] +0xc1,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x82,0x7c] +0xf7,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x82,0x7c] +0x7f,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x82,0x7c] +0x7e,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x82,0x7c] +0x7d,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x82,0x7c] +0x65,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x82,0x7c] +0x01,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x83,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x83,0x7c] +0x01,0xff,0x83,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x82,0x7c] +0x01,0x05,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x82,0x7c] +0xff,0x05,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x82,0x7c] +0x6b,0x04,0x82,0x7c + +# W32: v_cmp_lt_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x82,0x7c] +0x6a,0x04,0x82,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa2,0x7c] +0xf0,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xa2,0x7c] +0x80,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa2,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa2,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xa2,0x7c] +0xc1,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa2,0x7c] +0xf7,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xa2,0x7c] +0x7e,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa2,0x7c] +0x64,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa2,0x7c] +0x02,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa2,0x7c] +0x04,0x04,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa2,0x7c] +0x01,0x05,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa3,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa3,0x7c] +0x01,0xfd,0xa3,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa2,0x7c] +0xfe,0x05,0xa2,0x7c + +# W32: v_cmp_lt_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xa2,0x7c] +0x6a,0x04,0xa2,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x72,0x7c] +0x80,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x72,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_lt_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x72,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x72,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_lt_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x72,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_lt_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x72,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x72,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_lt_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x72,0x7c] +0xc1,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x72,0x7c] +0x7f,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x72,0x7c] +0x7e,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x72,0x7c] +0x7d,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x72,0x7c] +0x65,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x72,0x7c] +0x01,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x72,0x7c] +0x01,0x05,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x72,0x7c] +0x6b,0x04,0x72,0x7c + +# W32: v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x72,0x7c] +0x6a,0x04,0x72,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x92,0x7c] +0xf0,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x92,0x7c] +0x80,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x92,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x92,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x92,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x92,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x92,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x92,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x92,0x7c] +0xc1,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x92,0x7c] +0xf7,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x92,0x7c] +0x7f,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x92,0x7c] +0x7e,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x92,0x7c] +0x7d,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x92,0x7c] +0x65,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x92,0x7c] +0x01,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x93,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x93,0x7c] +0x01,0xff,0x93,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x92,0x7c] +0x01,0x05,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x92,0x7c] +0xff,0x05,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x92,0x7c] +0x6b,0x04,0x92,0x7c + +# W32: v_cmp_lt_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x92,0x7c] +0x6a,0x04,0x92,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb2,0x7c] +0xf0,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xb2,0x7c] +0x80,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_lt_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb2,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_lt_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_lt_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb2,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_lt_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xb2,0x7c] +0xc1,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb2,0x7c] +0xf7,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xb2,0x7c] +0x7e,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb2,0x7c] +0x64,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb2,0x7c] +0x02,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb2,0x7c] +0x04,0x04,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb2,0x7c] +0x01,0x05,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb3,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb3,0x7c] +0x01,0xfd,0xb3,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb2,0x7c] +0xfe,0x05,0xb2,0x7c + +# W32: v_cmp_lt_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xb2,0x7c] +0x6a,0x04,0xb2,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x6a,0x7c] +0x80,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x6a,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ne_i16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x6a,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x6a,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ne_i16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ne_i16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x6a,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ne_i16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x6a,0x7c] +0xc1,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x6a,0x7c] +0x7f,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x6a,0x7c] +0x7e,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x6a,0x7c] +0x7d,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x6a,0x7c] +0x65,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x6a,0x7c] +0x01,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x6a,0x7c] +0x01,0x05,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x6a,0x7c] +0x6b,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x6a,0x7c] +0x6a,0x04,0x6a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x8a,0x7c] +0xf0,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x8a,0x7c] +0x80,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ne_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ne_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ne_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ne_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x8a,0x7c] +0xc1,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x8a,0x7c] +0xf7,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x8a,0x7c] +0x7f,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x8a,0x7c] +0x7e,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x8a,0x7c] +0x7d,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x8a,0x7c] +0x65,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x8a,0x7c] +0x01,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x8b,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x8b,0x7c] +0x01,0xff,0x8b,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8a,0x7c] +0x01,0x05,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x8a,0x7c] +0xff,0x05,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8a,0x7c] +0x6b,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8a,0x7c] +0x6a,0x04,0x8a,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xaa,0x7c] +0xf0,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xaa,0x7c] +0x80,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ne_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xaa,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ne_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ne_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xaa,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ne_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xaa,0x7c] +0xc1,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xaa,0x7c] +0xf7,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xaa,0x7c] +0x7e,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xaa,0x7c] +0x64,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xaa,0x7c] +0x02,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xaa,0x7c] +0x04,0x04,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xaa,0x7c] +0x01,0x05,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xab,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xab,0x7c] +0x01,0xfd,0xab,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xaa,0x7c] +0xfe,0x05,0xaa,0x7c + +# W32: v_cmp_ne_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xaa,0x7c] +0x6a,0x04,0xaa,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x1a,0x7c] +0x80,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x1a,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_neq_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x1a,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x1a,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_neq_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_neq_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1a,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_neq_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x1a,0x7c] +0xc1,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x1a,0x7c] +0x7f,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x1a,0x7c] +0x7e,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x1a,0x7c] +0x7d,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x1a,0x7c] +0x65,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x1a,0x7c] +0x01,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1a,0x7c] +0x01,0x05,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1a,0x7c] +0x6b,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1a,0x7c] +0x6a,0x04,0x1a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x3a,0x7c] +0xf0,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x3a,0x7c] +0x80,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_neq_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_neq_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_neq_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_neq_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x3a,0x7c] +0xc1,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x3a,0x7c] +0xf7,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x3a,0x7c] +0x7f,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x3a,0x7c] +0x7e,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x3a,0x7c] +0x7d,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x3a,0x7c] +0x65,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x3a,0x7c] +0x01,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x3b,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x3b,0x7c] +0x01,0xff,0x3b,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3a,0x7c] +0x01,0x05,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x3a,0x7c] +0xff,0x05,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3a,0x7c] +0x6b,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3a,0x7c] +0x6a,0x04,0x3a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5a,0x7c] +0xf0,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x5a,0x7c] +0x80,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_neq_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_neq_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_neq_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_neq_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x5a,0x7c] +0xc1,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5a,0x7c] +0xf7,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x5a,0x7c] +0x7e,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5a,0x7c] +0x64,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5a,0x7c] +0x02,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5a,0x7c] +0x04,0x04,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5a,0x7c] +0x01,0x05,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5b,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5b,0x7c] +0x01,0xfd,0x5b,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5a,0x7c] +0xfe,0x05,0x5a,0x7c + +# W32: v_cmp_neq_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5a,0x7c] +0x6a,0x04,0x5a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x7a,0x7c] +0x80,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x7a,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ne_u16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x7a,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x7a,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ne_u16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ne_u16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x7a,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ne_u16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x7a,0x7c] +0xc1,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x7a,0x7c] +0x7f,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x7a,0x7c] +0x7e,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x7a,0x7c] +0x7d,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x7a,0x7c] +0x65,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x7a,0x7c] +0x01,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x7a,0x7c] +0x01,0x05,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x7a,0x7c] +0x6b,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x7a,0x7c] +0x6a,0x04,0x7a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x9a,0x7c] +0xf0,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x9a,0x7c] +0x80,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9a,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ne_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9a,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9a,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ne_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9a,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ne_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9a,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9a,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ne_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x9a,0x7c] +0xc1,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x9a,0x7c] +0xf7,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x9a,0x7c] +0x7f,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x9a,0x7c] +0x7e,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x9a,0x7c] +0x7d,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x9a,0x7c] +0x65,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x9a,0x7c] +0x01,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x9b,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x9b,0x7c] +0x01,0xff,0x9b,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9a,0x7c] +0x01,0x05,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x9a,0x7c] +0xff,0x05,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9a,0x7c] +0x6b,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9a,0x7c] +0x6a,0x04,0x9a,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xba,0x7c] +0xf0,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xba,0x7c] +0x80,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xba,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ne_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xba,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xba,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ne_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xba,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ne_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xba,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xba,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ne_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xba,0x7c] +0xc1,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xba,0x7c] +0xf7,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xba,0x7c] +0x7e,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xba,0x7c] +0x64,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xba,0x7c] +0x02,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xba,0x7c] +0x04,0x04,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xba,0x7c] +0x01,0x05,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbb,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbb,0x7c] +0x01,0xfd,0xbb,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xba,0x7c] +0xfe,0x05,0xba,0x7c + +# W32: v_cmp_ne_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xba,0x7c] +0x6a,0x04,0xba,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x12,0x7c] +0x80,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x12,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_nge_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x12,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x12,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_nge_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x12,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_nge_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x12,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x12,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_nge_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x12,0x7c] +0xc1,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x12,0x7c] +0x7f,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x12,0x7c] +0x7e,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x12,0x7c] +0x7d,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x12,0x7c] +0x65,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x12,0x7c] +0x01,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x12,0x7c] +0x01,0x05,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x12,0x7c] +0x6b,0x04,0x12,0x7c + +# W32: v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x12,0x7c] +0x6a,0x04,0x12,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x32,0x7c] +0xf0,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x32,0x7c] +0x80,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x32,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nge_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x32,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x32,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nge_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x32,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nge_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x32,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x32,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nge_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x32,0x7c] +0xc1,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x32,0x7c] +0xf7,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x32,0x7c] +0x7f,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x32,0x7c] +0x7e,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x32,0x7c] +0x7d,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x32,0x7c] +0x65,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x32,0x7c] +0x01,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x33,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x33,0x7c] +0x01,0xff,0x33,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x32,0x7c] +0x01,0x05,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x32,0x7c] +0xff,0x05,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x32,0x7c] +0x6b,0x04,0x32,0x7c + +# W32: v_cmp_nge_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x32,0x7c] +0x6a,0x04,0x32,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x52,0x7c] +0xf0,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x52,0x7c] +0x80,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x52,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nge_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x52,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x52,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nge_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x52,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nge_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x52,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x52,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nge_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x52,0x7c] +0xc1,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x52,0x7c] +0xf7,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x52,0x7c] +0x7e,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x52,0x7c] +0x64,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x52,0x7c] +0x02,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x52,0x7c] +0x04,0x04,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x52,0x7c] +0x01,0x05,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x53,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x53,0x7c] +0x01,0xfd,0x53,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x52,0x7c] +0xfe,0x05,0x52,0x7c + +# W32: v_cmp_nge_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x52,0x7c] +0x6a,0x04,0x52,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x16,0x7c] +0x80,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x16,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_ngt_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x16,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x16,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_ngt_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x16,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_ngt_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x16,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x16,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_ngt_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x16,0x7c] +0xc1,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x16,0x7c] +0x7f,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x16,0x7c] +0x7e,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x16,0x7c] +0x7d,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x16,0x7c] +0x65,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x16,0x7c] +0x01,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x16,0x7c] +0x01,0x05,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x16,0x7c] +0x6b,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x16,0x7c] +0x6a,0x04,0x16,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x36,0x7c] +0xf0,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x36,0x7c] +0x80,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x36,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ngt_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x36,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x36,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ngt_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x36,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ngt_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x36,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x36,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ngt_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x36,0x7c] +0xc1,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x36,0x7c] +0xf7,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x36,0x7c] +0x7f,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x36,0x7c] +0x7e,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x36,0x7c] +0x7d,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x36,0x7c] +0x65,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x36,0x7c] +0x01,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x37,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x37,0x7c] +0x01,0xff,0x37,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x36,0x7c] +0x01,0x05,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x36,0x7c] +0xff,0x05,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x36,0x7c] +0x6b,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x36,0x7c] +0x6a,0x04,0x36,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x56,0x7c] +0xf0,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x56,0x7c] +0x80,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x56,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_ngt_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x56,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x56,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_ngt_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x56,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_ngt_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x56,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x56,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_ngt_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x56,0x7c] +0xc1,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x56,0x7c] +0xf7,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x56,0x7c] +0x7e,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x56,0x7c] +0x64,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x56,0x7c] +0x02,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x56,0x7c] +0x04,0x04,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x56,0x7c] +0x01,0x05,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x57,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x57,0x7c] +0x01,0xfd,0x57,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x56,0x7c] +0xfe,0x05,0x56,0x7c + +# W32: v_cmp_ngt_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x56,0x7c] +0x6a,0x04,0x56,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x18,0x7c] +0x80,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x18,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_nle_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x18,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x18,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_nle_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x18,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_nle_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x18,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x18,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_nle_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x18,0x7c] +0xc1,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x18,0x7c] +0x7f,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x18,0x7c] +0x7e,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x18,0x7c] +0x7d,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x18,0x7c] +0x65,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x18,0x7c] +0x01,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x18,0x7c] +0x01,0x05,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x18,0x7c] +0x6b,0x04,0x18,0x7c + +# W32: v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x18,0x7c] +0x6a,0x04,0x18,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x38,0x7c] +0xf0,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x38,0x7c] +0x80,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x38,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nle_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x38,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x38,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nle_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x38,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nle_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x38,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x38,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nle_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x38,0x7c] +0xc1,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x38,0x7c] +0xf7,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x38,0x7c] +0x7f,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x38,0x7c] +0x7e,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x38,0x7c] +0x7d,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x38,0x7c] +0x65,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x38,0x7c] +0x01,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x39,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x39,0x7c] +0x01,0xff,0x39,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x38,0x7c] +0x01,0x05,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x38,0x7c] +0xff,0x05,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x38,0x7c] +0x6b,0x04,0x38,0x7c + +# W32: v_cmp_nle_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x38,0x7c] +0x6a,0x04,0x38,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x58,0x7c] +0xf0,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x58,0x7c] +0x80,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x58,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nle_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x58,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x58,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nle_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x58,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nle_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x58,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x58,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nle_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x58,0x7c] +0xc1,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x58,0x7c] +0xf7,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x58,0x7c] +0x7e,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x58,0x7c] +0x64,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x58,0x7c] +0x02,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x58,0x7c] +0x04,0x04,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x58,0x7c] +0x01,0x05,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x59,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x59,0x7c] +0x01,0xfd,0x59,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x58,0x7c] +0xfe,0x05,0x58,0x7c + +# W32: v_cmp_nle_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x58,0x7c] +0x6a,0x04,0x58,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x14,0x7c] +0x80,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x14,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_nlg_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x14,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x14,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_nlg_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x14,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_nlg_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x14,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x14,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_nlg_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x14,0x7c] +0xc1,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x14,0x7c] +0x7f,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x14,0x7c] +0x7e,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x14,0x7c] +0x7d,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x14,0x7c] +0x65,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x14,0x7c] +0x01,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x14,0x7c] +0x01,0x05,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x14,0x7c] +0x6b,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x14,0x7c] +0x6a,0x04,0x14,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x34,0x7c] +0xf0,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x34,0x7c] +0x80,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x34,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nlg_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x34,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x34,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nlg_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x34,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nlg_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x34,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x34,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nlg_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x34,0x7c] +0xc1,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x34,0x7c] +0xf7,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x34,0x7c] +0x7f,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x34,0x7c] +0x7e,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x34,0x7c] +0x7d,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x34,0x7c] +0x65,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x34,0x7c] +0x01,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x35,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x35,0x7c] +0x01,0xff,0x35,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x34,0x7c] +0x01,0x05,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x34,0x7c] +0xff,0x05,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x34,0x7c] +0x6b,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x34,0x7c] +0x6a,0x04,0x34,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x54,0x7c] +0xf0,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x54,0x7c] +0x80,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x54,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nlg_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x54,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x54,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nlg_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x54,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nlg_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x54,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x54,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nlg_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x54,0x7c] +0xc1,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x54,0x7c] +0xf7,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x54,0x7c] +0x7e,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x54,0x7c] +0x64,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x54,0x7c] +0x02,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x54,0x7c] +0x04,0x04,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x54,0x7c] +0x01,0x05,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x55,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x55,0x7c] +0x01,0xfd,0x55,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x54,0x7c] +0xfe,0x05,0x54,0x7c + +# W32: v_cmp_nlg_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x54,0x7c] +0x6a,0x04,0x54,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x1c,0x7c] +0x80,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x1c,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_nlt_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x1c,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x1c,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_nlt_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_nlt_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1c,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_nlt_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x1c,0x7c] +0xc1,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x1c,0x7c] +0x7f,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x1c,0x7c] +0x7e,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x1c,0x7c] +0x7d,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x1c,0x7c] +0x65,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x1c,0x7c] +0x01,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1c,0x7c] +0x01,0x05,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1c,0x7c] +0x6b,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1c,0x7c] +0x6a,0x04,0x1c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x3c,0x7c] +0xf0,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x3c,0x7c] +0x80,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nlt_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nlt_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nlt_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nlt_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x3c,0x7c] +0xc1,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x3c,0x7c] +0xf7,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x3c,0x7c] +0x7f,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x3c,0x7c] +0x7e,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x3c,0x7c] +0x7d,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x3c,0x7c] +0x65,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x3c,0x7c] +0x01,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x3d,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x3d,0x7c] +0x01,0xff,0x3d,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3c,0x7c] +0x01,0x05,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x3c,0x7c] +0xff,0x05,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3c,0x7c] +0x6b,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3c,0x7c] +0x6a,0x04,0x3c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5c,0x7c] +0xf0,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x5c,0x7c] +0x80,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_nlt_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5c,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_nlt_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_nlt_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5c,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_nlt_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x5c,0x7c] +0xc1,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5c,0x7c] +0xf7,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x5c,0x7c] +0x7e,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5c,0x7c] +0x64,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5c,0x7c] +0x02,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5c,0x7c] +0x04,0x04,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5c,0x7c] +0x01,0x05,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5d,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5d,0x7c] +0x01,0xfd,0x5d,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5c,0x7c] +0xfe,0x05,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5c,0x7c] +0x6a,0x04,0x5c,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x0e,0x7c] +0x80,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x0e,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_o_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x0e,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x0e,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_o_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_o_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0e,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_o_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x0e,0x7c] +0xc1,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x0e,0x7c] +0x7f,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x0e,0x7c] +0x7e,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x0e,0x7c] +0x7d,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x0e,0x7c] +0x65,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x0e,0x7c] +0x01,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0e,0x7c] +0x01,0x05,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0e,0x7c] +0x6b,0x04,0x0e,0x7c + +# W32: v_cmp_o_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0e,0x7c] +0x6a,0x04,0x0e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x2e,0x7c] +0xf0,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x2e,0x7c] +0x80,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_o_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x2e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_o_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_o_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x2e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_o_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x2e,0x7c] +0xc1,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x2e,0x7c] +0xf7,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x2e,0x7c] +0x7f,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x2e,0x7c] +0x7e,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x2e,0x7c] +0x7d,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x2e,0x7c] +0x65,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x2e,0x7c] +0x01,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x2f,0x7c] +# W64: v_cmp_o_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x2f,0x7c] +0x01,0xff,0x2f,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2e,0x7c] +0x01,0x05,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x2e,0x7c] +0xff,0x05,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x2e,0x7c] +0x6b,0x04,0x2e,0x7c + +# W32: v_cmp_o_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x2e,0x7c] +0x6a,0x04,0x2e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4e,0x7c] +0xf0,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x4e,0x7c] +0x80,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_o_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_o_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_o_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_o_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x4e,0x7c] +0xc1,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4e,0x7c] +0xf7,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x4e,0x7c] +0x7e,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4e,0x7c] +0x64,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4e,0x7c] +0x02,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4e,0x7c] +0x04,0x04,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4e,0x7c] +0x01,0x05,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4f,0x7c] +# W64: v_cmp_o_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4f,0x7c] +0x01,0xfd,0x4f,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4e,0x7c] +0xfe,0x05,0x4e,0x7c + +# W32: v_cmp_o_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x4e,0x7c] +0x6a,0x04,0x4e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x1e,0x7c] +0x80,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x1e,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_t_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x1e,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x1e,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_t_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_t_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1e,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_t_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x1e,0x7c] +0xc1,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x1e,0x7c] +0x7f,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x1e,0x7c] +0x7e,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x1e,0x7c] +0x7d,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x1e,0x7c] +0x65,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x1e,0x7c] +0x01,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1e,0x7c] +0x01,0x05,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x1e,0x7c] +0x6b,0x04,0x1e,0x7c + +# W32: v_cmp_t_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x1e,0x7c] +0x6a,0x04,0x1e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x3e,0x7c] +0xf0,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x3e,0x7c] +0x80,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x3e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x3e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x3e,0x7c] +0xc1,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x3e,0x7c] +0xf7,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x3e,0x7c] +0x7f,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x3e,0x7c] +0x7e,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x3e,0x7c] +0x7d,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x3e,0x7c] +0x65,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x3e,0x7c] +0x01,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x3f,0x7c] +# W64: v_cmp_t_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x3f,0x7c] +0x01,0xff,0x3f,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3e,0x7c] +0x01,0x05,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x3e,0x7c] +0xff,0x05,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x3e,0x7c] +0x6b,0x04,0x3e,0x7c + +# W32: v_cmp_t_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x3e,0x7c] +0x6a,0x04,0x3e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5e,0x7c] +0xf0,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x5e,0x7c] +0x80,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x5e,0x7c] +0xc1,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5e,0x7c] +0xf7,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x5e,0x7c] +0x7e,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5e,0x7c] +0x64,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5e,0x7c] +0x02,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5e,0x7c] +0x04,0x04,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5e,0x7c] +0x01,0x05,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5f,0x7c] +# W64: v_cmp_t_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5f,0x7c] +0x01,0xfd,0x5f,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5e,0x7c] +0xfe,0x05,0x5e,0x7c + +# W32: v_cmp_t_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x5e,0x7c] +0x6a,0x04,0x5e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x8e,0x7c] +0xf0,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x8e,0x7c] +0x80,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_i32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x8e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_i32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_i32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x8e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_i32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x8e,0x7c] +0xc1,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x8e,0x7c] +0xf7,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x8e,0x7c] +0x7f,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x8e,0x7c] +0x7e,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x8e,0x7c] +0x7d,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x8e,0x7c] +0x65,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x8e,0x7c] +0x01,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x8f,0x7c] +# W64: v_cmp_t_i32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x8f,0x7c] +0x01,0xff,0x8f,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8e,0x7c] +0x01,0x05,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x8e,0x7c] +0xff,0x05,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x8e,0x7c] +0x6b,0x04,0x8e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x8e,0x7c] +0x6a,0x04,0x8e,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xae,0x7c] +0xf0,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xae,0x7c] +0x80,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xae,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_i64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xae,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xae,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_i64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xae,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_i64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xae,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xae,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_i64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xae,0x7c] +0xc1,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xae,0x7c] +0xf7,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xae,0x7c] +0x7e,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xae,0x7c] +0x64,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xae,0x7c] +0x02,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xae,0x7c] +0x04,0x04,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xae,0x7c] +0x01,0x05,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xaf,0x7c] +# W64: v_cmp_t_i64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xaf,0x7c] +0x01,0xfd,0xaf,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xae,0x7c] +0xfe,0x05,0xae,0x7c + +# W32: v_cmp_t_i64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xae,0x7c] +0x6a,0x04,0xae,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x9e,0x7c] +0xf0,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x9e,0x7c] +0x80,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9e,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_u32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x9e,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9e,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_u32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9e,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_u32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x9e,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9e,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_u32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x9e,0x7c] +0xc1,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x9e,0x7c] +0xf7,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x9e,0x7c] +0x7f,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x9e,0x7c] +0x7e,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x9e,0x7c] +0x7d,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x9e,0x7c] +0x65,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x9e,0x7c] +0x01,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x9f,0x7c] +# W64: v_cmp_t_u32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x9f,0x7c] +0x01,0xff,0x9f,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9e,0x7c] +0x01,0x05,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x9e,0x7c] +0xff,0x05,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x9e,0x7c] +0x6b,0x04,0x9e,0x7c + +# W32: v_cmp_t_u32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x9e,0x7c] +0x6a,0x04,0x9e,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbe,0x7c] +0xf0,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0xbe,0x7c] +0x80,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_t_u64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0xbe,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_t_u64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_t_u64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0xbe,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_t_u64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0xbe,0x7c] +0xc1,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbe,0x7c] +0xf7,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0xbe,0x7c] +0x7e,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbe,0x7c] +0x64,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbe,0x7c] +0x02,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbe,0x7c] +0x04,0x04,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbe,0x7c] +0x01,0x05,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbf,0x7c] +# W64: v_cmp_t_u64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbf,0x7c] +0x01,0xfd,0xbf,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbe,0x7c] +0xfe,0x05,0xbe,0x7c + +# W32: v_cmp_t_u64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0xbe,0x7c] +0x6a,0x04,0xbe,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x10,0x7c] +0x80,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, 0x3456, v2 ; encoding: [0xff,0x04,0x10,0x7c,0x56,0x34,0x00,0x00] +# W64: v_cmp_u_f16_e32 vcc, 0x3456, v2 ; encoding: [0xff,0x04,0x10,0x7c,0x56,0x34,0x00,0x00] +0xff,0x04,0x10,0x7c,0x56,0x34,0x00,0x00 + +# W32: v_cmp_u_f16_e32 vcc_lo, 0xfe0b, v2 ; encoding: [0xff,0x04,0x10,0x7c,0x0b,0xfe,0x00,0x00] +# W64: v_cmp_u_f16_e32 vcc, 0xfe0b, v2 ; encoding: [0xff,0x04,0x10,0x7c,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x10,0x7c,0x0b,0xfe,0x00,0x00 + +# W32: v_cmp_u_f16_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7c] +0xc1,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x10,0x7c] +0x7f,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x10,0x7c] +0x7e,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x10,0x7c] +0x7d,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x10,0x7c] +0x65,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x10,0x7c] +0x01,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x10,0x7c] +0x01,0x05,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x10,0x7c] +0x6b,0x04,0x10,0x7c + +# W32: v_cmp_u_f16_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x10,0x7c] +0x6a,0x04,0x10,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, 0.5, v2 ; encoding: [0xf0,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, 0.5, v2 ; encoding: [0xf0,0x04,0x30,0x7c] +0xf0,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, 0, v2 ; encoding: [0x80,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, 0, v2 ; encoding: [0x80,0x04,0x30,0x7c] +0x80,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, 0x3f717273, v2 ; encoding: [0xff,0x04,0x30,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_u_f32_e32 vcc, 0x3f717273, v2 ; encoding: [0xff,0x04,0x30,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x30,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_u_f32_e32 vcc_lo, 0xaf123456, v2 ; encoding: [0xff,0x04,0x30,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_u_f32_e32 vcc, 0xaf123456, v2 ; encoding: [0xff,0x04,0x30,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x30,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_u_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x30,0x7c] +0xc1,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, -4.0, v2 ; encoding: [0xf7,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, -4.0, v2 ; encoding: [0xf7,0x04,0x30,0x7c] +0xf7,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, exec_hi, v2 ; encoding: [0x7f,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, exec_hi, v2 ; encoding: [0x7f,0x04,0x30,0x7c] +0x7f,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, exec_lo, v2 ; encoding: [0x7e,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, exec_lo, v2 ; encoding: [0x7e,0x04,0x30,0x7c] +0x7e,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, m0, v2 ; encoding: [0x7d,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, m0, v2 ; encoding: [0x7d,0x04,0x30,0x7c] +0x7d,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, s101, v2 ; encoding: [0x65,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, s101, v2 ; encoding: [0x65,0x04,0x30,0x7c] +0x65,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, s1, v2 ; encoding: [0x01,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, s1, v2 ; encoding: [0x01,0x04,0x30,0x7c] +0x01,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, v1, v255 ; encoding: [0x01,0xff,0x31,0x7c] +# W64: v_cmp_u_f32_e32 vcc, v1, v255 ; encoding: [0x01,0xff,0x31,0x7c] +0x01,0xff,0x31,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x30,0x7c] +0x01,0x05,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, v255, v2 ; encoding: [0xff,0x05,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, v255, v2 ; encoding: [0xff,0x05,0x30,0x7c] +0xff,0x05,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, vcc_hi, v2 ; encoding: [0x6b,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, vcc_hi, v2 ; encoding: [0x6b,0x04,0x30,0x7c] +0x6b,0x04,0x30,0x7c + +# W32: v_cmp_u_f32_e32 vcc_lo, vcc_lo, v2 ; encoding: [0x6a,0x04,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, vcc_lo, v2 ; encoding: [0x6a,0x04,0x30,0x7c] +0x6a,0x04,0x30,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, 0.5, v[2:3] ; encoding: [0xf0,0x04,0x50,0x7c] +0xf0,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, 0, v[2:3] ; encoding: [0x80,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, 0, v[2:3] ; encoding: [0x80,0x04,0x50,0x7c] +0x80,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x50,0x7c,0x73,0x72,0x71,0x3f] +# W64: v_cmp_u_f64_e32 vcc, 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x50,0x7c,0x73,0x72,0x71,0x3f] +0xff,0x04,0x50,0x7c,0x73,0x72,0x71,0x3f + +# W32: v_cmp_u_f64_e32 vcc_lo, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x50,0x7c,0x56,0x34,0x12,0xaf] +# W64: v_cmp_u_f64_e32 vcc, 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x50,0x7c,0x56,0x34,0x12,0xaf] +0xff,0x04,0x50,0x7c,0x56,0x34,0x12,0xaf + +# W32: v_cmp_u_f64_e32 vcc_lo, -1, v[2:3] ; encoding: [0xc1,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, -1, v[2:3] ; encoding: [0xc1,0x04,0x50,0x7c] +0xc1,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, -4.0, v[2:3] ; encoding: [0xf7,0x04,0x50,0x7c] +0xf7,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, exec, v[2:3] ; encoding: [0x7e,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, exec, v[2:3] ; encoding: [0x7e,0x04,0x50,0x7c] +0x7e,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, s[100:101], v[2:3] ; encoding: [0x64,0x04,0x50,0x7c] +0x64,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, s[2:3], v[2:3] ; encoding: [0x02,0x04,0x50,0x7c] +0x02,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, s[4:5], v[2:3] ; encoding: [0x04,0x04,0x50,0x7c] +0x04,0x04,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x50,0x7c] +0x01,0x05,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x51,0x7c] +# W64: v_cmp_u_f64_e32 vcc, v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x51,0x7c] +0x01,0xfd,0x51,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x50,0x7c] +0xfe,0x05,0x50,0x7c + +# W32: v_cmp_u_f64_e32 vcc_lo, vcc, v[2:3] ; encoding: [0x6a,0x04,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, vcc, v[2:3] ; encoding: [0x6a,0x04,0x50,0x7c] +0x6a,0x04,0x50,0x7c + +# W32: v_cmp_class_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0xfc,0x7c] +# W64: v_cmp_class_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0xfc,0x7c] +0x01,0x05,0xfc,0x7c + +# W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +# W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05 + +# W32: v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +# W64: v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05 + +# W32: v_cmp_class_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_class_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_class_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x7e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x7e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_class_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x7e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_class_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_class_f64_e32 vcc_lo, v[1:2], v2 ; encoding: [0x01,0x05,0xfe,0x7c] +# W64: v_cmp_class_f64_e32 vcc, v[1:2], v2 ; encoding: [0x01,0x05,0xfe,0x7c] +0x01,0x05,0xfe,0x7c + +# W32: v_cmp_class_f64_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s100, v[1:2], v2 ; encoding: [0x64,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[100:101], v[1:2], v2 ; encoding: [0x64,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, exec, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], exec, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, s[100:101], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], s[100:101], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, s[2:3], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], s[2:3], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, s[4:5], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], s[4:5], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], exec_hi ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], exec_hi ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], exec_lo ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], exec_lo ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], m0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], m0 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], s101 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], s101 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], s2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], s2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], v255 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], v255 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, -v[1:2], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_class_f64_e64 s[10:11], -v[1:2], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], vcc_hi ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], vcc_hi ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[1:2], vcc_lo ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[1:2], vcc_lo ; encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x7f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_class_f64_e64 s10, v[254:255], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], v[254:255], v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s10, vcc, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[10:11], vcc, v2 ; encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x7f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_class_f64_e64 s12, v[1:2], v2 ; encoding: [0x0c,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_class_f64_e64 s[12:13], v[1:2], v2 ; encoding: [0x0c,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x04,0x7c] +# W64: v_cmp_eq_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x04,0x7c] +0x01,0x05,0x04,0x7c + +# W32: v_cmp_eq_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x02,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x02,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x02,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x02,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x02,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x02,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_eq_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_eq_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_eq_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_eq_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_eq_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x02,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x02,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x02,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x02,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x02,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x02,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x24,0x7c] +# W64: v_cmp_eq_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x24,0x7c] +0x01,0x05,0x24,0x7c + +# W32: v_cmp_eq_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x12,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x12,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x12,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x12,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x12,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x12,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_eq_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_eq_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_eq_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_eq_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_eq_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x12,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x12,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x12,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x12,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x12,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x12,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x44,0x7c] +# W64: v_cmp_eq_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x44,0x7c] +0x01,0x05,0x44,0x7c + +# W32: v_cmp_eq_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x22,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x22,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x22,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x22,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x22,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x22,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x22,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x22,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_eq_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_eq_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_eq_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x22,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x22,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x22,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x22,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x22,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x22,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x64,0x7c] +# W64: v_cmp_eq_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x64,0x7c] +0x01,0x05,0x64,0x7c + +# W32: v_cmp_eq_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x32,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x32,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x32,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x32,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x32,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x32,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x32,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x32,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x32,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x84,0x7c] +# W64: v_cmp_eq_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x84,0x7c] +0x01,0x05,0x84,0x7c + +# W32: v_cmp_eq_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x42,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x42,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x42,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x42,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x42,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x42,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x42,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x42,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x42,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa4,0x7c] +# W64: v_cmp_eq_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa4,0x7c] +0x01,0x05,0xa4,0x7c + +# W32: v_cmp_eq_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x52,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x52,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x52,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x52,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x52,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x52,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x52,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x52,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x52,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x52,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x52,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x74,0x7c] +# W64: v_cmp_eq_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x74,0x7c] +0x01,0x05,0x74,0x7c + +# W32: v_cmp_eq_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x3a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x3a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x3a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x94,0x7c] +# W64: v_cmp_eq_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x94,0x7c] +0x01,0x05,0x94,0x7c + +# W32: v_cmp_eq_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb4,0x7c] +# W64: v_cmp_eq_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb4,0x7c] +0x01,0x05,0xb4,0x7c + +# W32: v_cmp_eq_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_eq_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_eq_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_eq_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x00,0x7c] +# W64: v_cmp_f_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x00,0x7c] +0x01,0x05,0x00,0x7c + +# W32: v_cmp_f_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x00,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x00,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x00,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x00,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x00,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x00,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_f_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_f_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_f_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_f_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_f_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_f_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x00,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x00,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x00,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x00,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x00,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x00,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c] +# W64: v_cmp_f_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x20,0x7c] +0x01,0x05,0x20,0x7c + +# W32: v_cmp_f_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x10,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x10,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x10,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x10,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x10,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x10,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_f_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_f_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_f_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_f_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_f_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_f_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x10,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x10,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x10,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x10,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x10,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x10,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x40,0x7c] +# W64: v_cmp_f_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x40,0x7c] +0x01,0x05,0x40,0x7c + +# W32: v_cmp_f_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x20,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x20,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x20,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x20,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x20,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x20,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x20,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x20,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_f_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_f_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_f_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x20,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x20,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x20,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x20,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x20,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x20,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x80,0x7c] +# W64: v_cmp_f_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x80,0x7c] +0x01,0x05,0x80,0x7c + +# W32: v_cmp_f_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x40,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x40,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x40,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x40,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x40,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x40,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x40,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x40,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x40,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa0,0x7c] +# W64: v_cmp_f_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa0,0x7c] +0x01,0x05,0xa0,0x7c + +# W32: v_cmp_f_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x50,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x50,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x50,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x50,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x50,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x50,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x50,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x50,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x50,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x50,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x50,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x90,0x7c] +# W64: v_cmp_f_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x90,0x7c] +0x01,0x05,0x90,0x7c + +# W32: v_cmp_f_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x48,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x48,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x48,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x48,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x48,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x48,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x48,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x48,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x48,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb0,0x7c] +# W64: v_cmp_f_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb0,0x7c] +0x01,0x05,0xb0,0x7c + +# W32: v_cmp_f_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x58,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x58,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x58,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x58,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x58,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x58,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x58,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_f_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x58,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x58,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x58,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_f_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_f_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x58,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0c,0x7c] +# W64: v_cmp_ge_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0c,0x7c] +0x01,0x05,0x0c,0x7c + +# W32: v_cmp_ge_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x06,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x06,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x06,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x06,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x06,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x06,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ge_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ge_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ge_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ge_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ge_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x06,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x06,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x06,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x06,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x06,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x06,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2c,0x7c] +# W64: v_cmp_ge_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2c,0x7c] +0x01,0x05,0x2c,0x7c + +# W32: v_cmp_ge_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x16,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x16,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x16,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x16,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x16,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x16,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ge_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ge_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ge_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ge_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ge_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x16,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x16,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x16,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x16,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x16,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x16,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4c,0x7c] +# W64: v_cmp_ge_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4c,0x7c] +0x01,0x05,0x4c,0x7c + +# W32: v_cmp_ge_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x26,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x26,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x26,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x26,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x26,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x26,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x26,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x26,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ge_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ge_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ge_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x26,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x26,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x26,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x26,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x26,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x26,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x6c,0x7c] +# W64: v_cmp_ge_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x6c,0x7c] +0x01,0x05,0x6c,0x7c + +# W32: v_cmp_ge_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x36,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x36,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x36,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x36,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x36,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x36,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x36,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x36,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x36,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8c,0x7c] +# W64: v_cmp_ge_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8c,0x7c] +0x01,0x05,0x8c,0x7c + +# W32: v_cmp_ge_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x46,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x46,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x46,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x46,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x46,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x46,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x46,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x46,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x46,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xac,0x7c] +# W64: v_cmp_ge_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xac,0x7c] +0x01,0x05,0xac,0x7c + +# W32: v_cmp_ge_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x56,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x56,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x56,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x56,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x56,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x56,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x56,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x56,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x56,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x56,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x56,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x7c,0x7c] +# W64: v_cmp_ge_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x7c,0x7c] +0x01,0x05,0x7c,0x7c + +# W32: v_cmp_ge_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x3e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x3e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x3e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9c,0x7c] +# W64: v_cmp_ge_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9c,0x7c] +0x01,0x05,0x9c,0x7c + +# W32: v_cmp_ge_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbc,0x7c] +# W64: v_cmp_ge_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbc,0x7c] +0x01,0x05,0xbc,0x7c + +# W32: v_cmp_ge_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ge_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ge_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ge_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x08,0x7c] +# W64: v_cmp_gt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x08,0x7c] +0x01,0x05,0x08,0x7c + +# W32: v_cmp_gt_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x04,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x04,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x04,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x04,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x04,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x04,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_gt_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_gt_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_gt_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_gt_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_gt_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x04,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x04,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x04,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x04,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x04,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x04,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x28,0x7c] +# W64: v_cmp_gt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x28,0x7c] +0x01,0x05,0x28,0x7c + +# W32: v_cmp_gt_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x14,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x14,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x14,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x14,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x14,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x14,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_gt_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_gt_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_gt_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_gt_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_gt_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x14,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x14,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x14,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x14,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x14,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x14,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x48,0x7c] +# W64: v_cmp_gt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x48,0x7c] +0x01,0x05,0x48,0x7c + +# W32: v_cmp_gt_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x24,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x24,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x24,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x24,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x24,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x24,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x24,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x24,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_gt_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_gt_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_gt_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x24,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x24,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x24,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x24,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x24,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x24,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x68,0x7c] +# W64: v_cmp_gt_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x68,0x7c] +0x01,0x05,0x68,0x7c + +# W32: v_cmp_gt_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x34,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x34,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x34,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x34,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x34,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x34,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x34,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x34,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x34,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x88,0x7c] +# W64: v_cmp_gt_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x88,0x7c] +0x01,0x05,0x88,0x7c + +# W32: v_cmp_gt_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x44,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x44,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x44,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x44,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x44,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x44,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x44,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x44,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x44,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa8,0x7c] +# W64: v_cmp_gt_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa8,0x7c] +0x01,0x05,0xa8,0x7c + +# W32: v_cmp_gt_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x54,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x54,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x54,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x54,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x54,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x54,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x54,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x54,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x54,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x54,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x54,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x78,0x7c] +# W64: v_cmp_gt_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x78,0x7c] +0x01,0x05,0x78,0x7c + +# W32: v_cmp_gt_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x3c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x3c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x3c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x98,0x7c] +# W64: v_cmp_gt_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x98,0x7c] +0x01,0x05,0x98,0x7c + +# W32: v_cmp_gt_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb8,0x7c] +# W64: v_cmp_gt_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb8,0x7c] +0x01,0x05,0xb8,0x7c + +# W32: v_cmp_gt_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_gt_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_gt_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_gt_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x06,0x7c] +# W64: v_cmp_le_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x06,0x7c] +0x01,0x05,0x06,0x7c + +# W32: v_cmp_le_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x03,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x03,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x03,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x03,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x03,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x03,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_le_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_le_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_le_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_le_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_le_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_le_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x03,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x03,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x03,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x03,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x03,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x03,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x26,0x7c] +# W64: v_cmp_le_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x26,0x7c] +0x01,0x05,0x26,0x7c + +# W32: v_cmp_le_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x13,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x13,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x13,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x13,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x13,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x13,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_le_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_le_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_le_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_le_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_le_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_le_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x13,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x13,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x13,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x13,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x13,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x13,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x46,0x7c] +# W64: v_cmp_le_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x46,0x7c] +0x01,0x05,0x46,0x7c + +# W32: v_cmp_le_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x23,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x23,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x23,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x23,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x23,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x23,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x23,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x23,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_le_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_le_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_le_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x23,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x23,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x23,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x23,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x23,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x23,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x66,0x7c] +# W64: v_cmp_le_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x66,0x7c] +0x01,0x05,0x66,0x7c + +# W32: v_cmp_le_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x33,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x33,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x33,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x33,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x33,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x33,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x33,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x33,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x33,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x86,0x7c] +# W64: v_cmp_le_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x86,0x7c] +0x01,0x05,0x86,0x7c + +# W32: v_cmp_le_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x43,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x43,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x43,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x43,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x43,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x43,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x43,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x43,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x43,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa6,0x7c] +# W64: v_cmp_le_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa6,0x7c] +0x01,0x05,0xa6,0x7c + +# W32: v_cmp_le_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x53,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x53,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x53,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x53,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x53,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x53,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x53,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x53,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x53,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x53,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x53,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x76,0x7c] +# W64: v_cmp_le_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x76,0x7c] +0x01,0x05,0x76,0x7c + +# W32: v_cmp_le_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x3b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x3b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x3b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x96,0x7c] +# W64: v_cmp_le_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x96,0x7c] +0x01,0x05,0x96,0x7c + +# W32: v_cmp_le_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb6,0x7c] +# W64: v_cmp_le_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb6,0x7c] +0x01,0x05,0xb6,0x7c + +# W32: v_cmp_le_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_le_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_le_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_le_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0a,0x7c] +# W64: v_cmp_lg_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0a,0x7c] +0x01,0x05,0x0a,0x7c + +# W32: v_cmp_lg_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x05,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x05,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x05,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x05,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x05,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x05,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lg_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lg_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lg_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lg_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lg_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x05,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x05,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lg_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x05,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x05,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x05,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lg_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x05,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2a,0x7c] +# W64: v_cmp_lg_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2a,0x7c] +0x01,0x05,0x2a,0x7c + +# W32: v_cmp_lg_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x15,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x15,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x15,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x15,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x15,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x15,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lg_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lg_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lg_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lg_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lg_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x15,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x15,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lg_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x15,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x15,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x15,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lg_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x15,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4a,0x7c] +# W64: v_cmp_lg_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4a,0x7c] +0x01,0x05,0x4a,0x7c + +# W32: v_cmp_lg_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x25,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x25,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x25,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x25,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x25,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x25,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x25,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x25,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lg_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lg_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lg_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x25,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x25,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lg_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x25,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x25,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x25,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lg_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lg_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x25,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x02,0x7c] +# W64: v_cmp_lt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x02,0x7c] +0x01,0x05,0x02,0x7c + +# W32: v_cmp_lt_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x01,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x01,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x01,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x01,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x01,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x01,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lt_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lt_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lt_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lt_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lt_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x01,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x01,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x01,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x01,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x01,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x01,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] +# W64: v_cmp_lt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x22,0x7c] +0x01,0x05,0x22,0x7c + +# W32: v_cmp_lt_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x11,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x11,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x11,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x11,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x11,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x11,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lt_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lt_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lt_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lt_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lt_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x11,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x11,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x11,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x11,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x11,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x11,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x42,0x7c] +# W64: v_cmp_lt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x42,0x7c] +0x01,0x05,0x42,0x7c + +# W32: v_cmp_lt_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x21,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x21,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x21,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x21,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x21,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x21,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x21,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x21,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_lt_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_lt_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_lt_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x21,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x21,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x21,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x21,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x21,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x21,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x62,0x7c] +# W64: v_cmp_lt_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x62,0x7c] +0x01,0x05,0x62,0x7c + +# W32: v_cmp_lt_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x31,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x31,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x31,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x31,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x31,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x31,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x31,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x31,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x31,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x82,0x7c] +# W64: v_cmp_lt_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x82,0x7c] +0x01,0x05,0x82,0x7c + +# W32: v_cmp_lt_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x41,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x41,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x41,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x41,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x41,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x41,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x41,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x41,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x41,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa2,0x7c] +# W64: v_cmp_lt_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa2,0x7c] +0x01,0x05,0xa2,0x7c + +# W32: v_cmp_lt_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x51,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x51,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x51,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x51,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x51,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x51,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x51,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x51,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x51,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x51,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x51,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x72,0x7c] +# W64: v_cmp_lt_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x72,0x7c] +0x01,0x05,0x72,0x7c + +# W32: v_cmp_lt_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x39,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x39,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x39,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x39,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x39,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x39,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x39,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x39,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x39,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x92,0x7c] +# W64: v_cmp_lt_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x92,0x7c] +0x01,0x05,0x92,0x7c + +# W32: v_cmp_lt_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x49,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x49,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x49,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x49,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x49,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x49,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x49,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x49,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x49,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb2,0x7c] +# W64: v_cmp_lt_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb2,0x7c] +0x01,0x05,0xb2,0x7c + +# W32: v_cmp_lt_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x59,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x59,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x59,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x59,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x59,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x59,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x59,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_lt_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x59,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x59,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x59,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_lt_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_lt_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x59,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x6a,0x7c] +# W64: v_cmp_ne_i16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x6a,0x7c] +0x01,0x05,0x6a,0x7c + +# W32: v_cmp_ne_i16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x35,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x35,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x35,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x35,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x35,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_i16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x35,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x35,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x35,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_i16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x35,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8a,0x7c] +# W64: v_cmp_ne_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8a,0x7c] +0x01,0x05,0x8a,0x7c + +# W32: v_cmp_ne_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x45,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x45,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x45,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x45,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x45,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x45,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x45,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x45,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x45,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xaa,0x7c] +# W64: v_cmp_ne_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xaa,0x7c] +0x01,0x05,0xaa,0x7c + +# W32: v_cmp_ne_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x55,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x55,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x55,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x55,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x55,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x55,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x55,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x55,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x55,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x55,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x55,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1a,0x7c] +# W64: v_cmp_neq_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1a,0x7c] +0x01,0x05,0x1a,0x7c + +# W32: v_cmp_neq_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_neq_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_neq_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_neq_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_neq_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_neq_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_neq_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_neq_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3a,0x7c] +# W64: v_cmp_neq_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3a,0x7c] +0x01,0x05,0x3a,0x7c + +# W32: v_cmp_neq_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_neq_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_neq_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_neq_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_neq_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_neq_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_neq_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_neq_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5a,0x7c] +# W64: v_cmp_neq_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5a,0x7c] +0x01,0x05,0x5a,0x7c + +# W32: v_cmp_neq_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_neq_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_neq_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_neq_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_neq_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_neq_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_neq_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x7a,0x7c] +# W64: v_cmp_ne_u16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x7a,0x7c] +0x01,0x05,0x7a,0x7c + +# W32: v_cmp_ne_u16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x3d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x3d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_u16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x3d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_u16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9a,0x7c] +# W64: v_cmp_ne_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9a,0x7c] +0x01,0x05,0x9a,0x7c + +# W32: v_cmp_ne_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xba,0x7c] +# W64: v_cmp_ne_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xba,0x7c] +0x01,0x05,0xba,0x7c + +# W32: v_cmp_ne_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5d,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5d,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ne_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5d,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ne_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ne_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x12,0x7c] +# W64: v_cmp_nge_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x12,0x7c] +0x01,0x05,0x12,0x7c + +# W32: v_cmp_nge_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x09,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x09,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x09,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x09,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x09,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x09,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nge_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nge_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nge_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nge_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nge_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x09,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x09,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nge_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x09,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x09,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x09,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nge_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x09,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x32,0x7c] +# W64: v_cmp_nge_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x32,0x7c] +0x01,0x05,0x32,0x7c + +# W32: v_cmp_nge_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x19,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x19,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x19,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x19,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x19,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x19,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nge_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nge_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nge_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nge_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nge_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x19,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x19,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nge_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x19,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x19,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x19,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nge_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x19,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x52,0x7c] +# W64: v_cmp_nge_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x52,0x7c] +0x01,0x05,0x52,0x7c + +# W32: v_cmp_nge_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x29,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x29,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x29,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x29,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x29,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x29,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x29,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x29,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nge_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nge_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nge_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x29,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x29,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nge_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x29,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x29,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x29,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nge_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nge_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x29,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x16,0x7c] +# W64: v_cmp_ngt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x16,0x7c] +0x01,0x05,0x16,0x7c + +# W32: v_cmp_ngt_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ngt_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ngt_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ngt_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ngt_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ngt_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x36,0x7c] +# W64: v_cmp_ngt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x36,0x7c] +0x01,0x05,0x36,0x7c + +# W32: v_cmp_ngt_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ngt_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ngt_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ngt_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ngt_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ngt_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x56,0x7c] +# W64: v_cmp_ngt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x56,0x7c] +0x01,0x05,0x56,0x7c + +# W32: v_cmp_ngt_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_ngt_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_ngt_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_ngt_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2b,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2b,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2b,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_ngt_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_ngt_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x18,0x7c] +# W64: v_cmp_nle_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x18,0x7c] +0x01,0x05,0x18,0x7c + +# W32: v_cmp_nle_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nle_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nle_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nle_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nle_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nle_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nle_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nle_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x38,0x7c] +# W64: v_cmp_nle_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x38,0x7c] +0x01,0x05,0x38,0x7c + +# W32: v_cmp_nle_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nle_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nle_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nle_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nle_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nle_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nle_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nle_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x58,0x7c] +# W64: v_cmp_nle_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x58,0x7c] +0x01,0x05,0x58,0x7c + +# W32: v_cmp_nle_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nle_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nle_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nle_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2c,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2c,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nle_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2c,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nle_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nle_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x14,0x7c] +# W64: v_cmp_nlg_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x14,0x7c] +0x01,0x05,0x14,0x7c + +# W32: v_cmp_nlg_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlg_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlg_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlg_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlg_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlg_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x34,0x7c] +# W64: v_cmp_nlg_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x34,0x7c] +0x01,0x05,0x34,0x7c + +# W32: v_cmp_nlg_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlg_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlg_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlg_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlg_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlg_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x54,0x7c] +# W64: v_cmp_nlg_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x54,0x7c] +0x01,0x05,0x54,0x7c + +# W32: v_cmp_nlg_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlg_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlg_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlg_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2a,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2a,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2a,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlg_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlg_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1c,0x7c] +# W64: v_cmp_nlt_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1c,0x7c] +0x01,0x05,0x1c,0x7c + +# W32: v_cmp_nlt_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlt_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlt_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlt_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlt_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlt_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3c,0x7c] +# W64: v_cmp_nlt_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3c,0x7c] +0x01,0x05,0x3c,0x7c + +# W32: v_cmp_nlt_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlt_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlt_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlt_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlt_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlt_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5c,0x7c] +# W64: v_cmp_nlt_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5c,0x7c] +0x01,0x05,0x5c,0x7c + +# W32: v_cmp_nlt_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_nlt_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_nlt_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_nlt_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2e,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2e,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2e,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_nlt_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_nlt_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x0e,0x7c] +# W64: v_cmp_o_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x0e,0x7c] +0x01,0x05,0x0e,0x7c + +# W32: v_cmp_o_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x07,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x07,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x07,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x07,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x07,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x07,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_o_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_o_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_o_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_o_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_o_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_o_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x07,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x07,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_o_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x07,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x07,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x07,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_o_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x07,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x2e,0x7c] +# W64: v_cmp_o_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x2e,0x7c] +0x01,0x05,0x2e,0x7c + +# W32: v_cmp_o_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x17,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x17,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x17,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x17,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x17,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x17,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_o_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_o_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_o_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_o_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_o_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_o_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x17,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x17,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_o_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x17,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x17,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x17,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_o_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x17,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4e,0x7c] +# W64: v_cmp_o_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4e,0x7c] +0x01,0x05,0x4e,0x7c + +# W32: v_cmp_o_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x27,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x27,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x27,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x27,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x27,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x27,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x27,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x27,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_o_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_o_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_o_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x27,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x27,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_o_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x27,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x27,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x27,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_o_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_o_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x27,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x0f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x0f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x0f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_t_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_t_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_t_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_t_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_t_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_t_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x0f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x0f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x0f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x1e,0x7c] +# W64: v_cmp_t_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x1e,0x7c] +0x01,0x05,0x1e,0x7c + +# W32: v_cmp_t_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x1f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x1f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x1f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_t_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_t_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_t_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_t_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_t_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_t_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x1f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x1f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x1f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x3e,0x7c] +# W64: v_cmp_t_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x3e,0x7c] +0x01,0x05,0x3e,0x7c + +# W32: v_cmp_t_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x2f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x2f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_t_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_t_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_t_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x2f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x2f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x2f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5e,0x7c] +# W64: v_cmp_t_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5e,0x7c] +0x01,0x05,0x5e,0x7c + +# W32: v_cmp_t_i32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x8e,0x7c] +# W64: v_cmp_t_i32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x8e,0x7c] +0x01,0x05,0x8e,0x7c + +# W32: v_cmp_t_i32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x47,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x47,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x47,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x47,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x47,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_i32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x47,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x47,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x47,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_i32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x47,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_i64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xae,0x7c] +# W64: v_cmp_t_i64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xae,0x7c] +0x01,0x05,0xae,0x7c + +# W32: v_cmp_t_i64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x57,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x57,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x57,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x57,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x57,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x57,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x57,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_i64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x57,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x57,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x57,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_i64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_i64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x57,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x9e,0x7c] +# W64: v_cmp_t_u32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x9e,0x7c] +0x01,0x05,0x9e,0x7c + +# W32: v_cmp_t_u32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x4f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x4f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_u32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x4f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x4f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_u32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbe,0x7c] +# W64: v_cmp_t_u64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbe,0x7c] +0x01,0x05,0xbe,0x7c + +# W32: v_cmp_t_u64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x5f,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x5f,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_t_u64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x5f,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x5f,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_t_u64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_t_u64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x10,0x7c] +# W64: v_cmp_u_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x10,0x7c] +0x01,0x05,0x10,0x7c + +# W32: v_cmp_u_f16_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x08,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x08,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x08,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x08,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x08,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x08,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_u_f16_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_u_f16_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_u_f16_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_u_f16_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_u_f16_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_u_f16_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x08,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x08,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_u_f16_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x08,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x08,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x08,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_u_f16_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f16_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x08,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f32_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0x30,0x7c] +# W64: v_cmp_u_f32_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0x30,0x7c] +0x01,0x05,0x30,0x7c + +# W32: v_cmp_u_f32_e64 s10, 0.5, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], 0.5, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s100, v1, v2 ; encoding: [0x64,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[100:101], v1, v2 ; encoding: [0x64,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x18,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, 0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], 0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, -1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], -1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, -4.0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], -4.0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, exec_hi, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7f,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], exec_hi, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7f,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x7f,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, exec_lo, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], exec_lo, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, m0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7d,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], m0, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x7d,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x7d,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, s101, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x65,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], s101, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x65,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x65,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, s1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], s1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x01,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, 0.5 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, 0.5 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, 0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, 0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x18,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, -1 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, -1 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x18,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, -4.0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, -4.0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, exec_hi ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xff,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, exec_hi ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xff,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xff,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, exec_lo ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, exec_lo ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, m0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xfb,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, m0 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xfb,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xfb,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, s101 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xcb,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, s101 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xcb,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xcb,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, s2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, s2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0x05,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, v255 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xff,0x03,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, v255 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xff,0x03,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xff,0x03,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, v2 clamp ; encoding: [0x0a,0x80,0x18,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, v2 clamp ; encoding: [0x0a,0x80,0x18,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x18,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, -v1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_u_f32_e64 s[10:11], -v1, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_u_f32_e64 s10, v1, -v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_u_f32_e64 s[10:11], v1, -v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_u_f32_e64 s10, -v1, -v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_u_f32_e64 s[10:11], -v1, -v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_u_f32_e64 s10, v1, vcc_hi ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xd7,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, vcc_hi ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xd7,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xd7,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v1, vcc_lo ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v1, vcc_lo ; encoding: [0x0a,0x00,0x18,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x18,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_u_f32_e64 s10, v255, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xff,0x05,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], v255, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0xff,0x05,0x02,0x00] +0x0a,0x00,0x18,0xd4,0xff,0x05,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, vcc_hi, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x6b,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], vcc_hi, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x6b,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x6b,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s10, vcc_lo, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[10:11], vcc_lo, v2 ; encoding: [0x0a,0x00,0x18,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x18,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_u_f32_e64 s12, v1, v2 ; encoding: [0x0c,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f32_e64 s[12:13], v1, v2 ; encoding: [0x0c,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x18,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f64_e32 vcc_lo, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x50,0x7c] +# W64: v_cmp_u_f64_e32 vcc, v[1:2], v[2:3] ; encoding: [0x01,0x05,0x50,0x7c] +0x01,0x05,0x50,0x7c + +# W32: v_cmp_u_f64_e64 s10, 0.5, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xf0,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], 0.5, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xf0,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0xf0,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s100, v[1:2], v[2:3] ; encoding: [0x64,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[100:101], v[1:2], v[2:3] ; encoding: [0x64,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +0x64,0x00,0x28,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, 0, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x80,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], 0, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x80,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x80,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, -1, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xc1,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], -1, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xc1,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0xc1,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, -4.0, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xf7,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], -4.0, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xf7,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0xf7,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, exec, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x7e,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], exec, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x7e,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x7e,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x64,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], s[100:101], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x64,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x64,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x02,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], s[2:3], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x02,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x02,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x04,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], s[4:5], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x04,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x04,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], 0.5 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xe1,0x01,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], 0.5 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xe1,0x01,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xe1,0x01,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], 0 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x01,0x01,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], 0 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x01,0x01,0x00] +0x0a,0x00,0x28,0xd4,0x01,0x01,0x01,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], -1 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x83,0x01,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], -1 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x83,0x01,0x00] +0x0a,0x00,0x28,0xd4,0x01,0x83,0x01,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], -4.0 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xef,0x01,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], -4.0 ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xef,0x01,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xef,0x01,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], exec ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xfd,0x00,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], exec ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xfd,0x00,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xfd,0x00,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xc9,0x00,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], s[100:101] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xc9,0x00,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xc9,0x00,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x09,0x00,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], s[4:5] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x09,0x00,0x00] +0x0a,0x00,0x28,0xd4,0x01,0x09,0x00,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x0d,0x00,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], s[6:7] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x0d,0x00,0x00] +0x0a,0x00,0x28,0xd4,0x01,0x0d,0x00,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x28,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] clamp ; encoding: [0x0a,0x80,0x28,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x80,0x28,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x20] +# W64: v_cmp_u_f64_e64 s[10:11], -v[1:2], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x20] +0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x20 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x40] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x40] +0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x40 + +# W32: v_cmp_u_f64_e64 s10, -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x60] +# W64: v_cmp_u_f64_e64 s[10:11], -v[1:2], -v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x60] +0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xfd,0x03,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], v[254:255] ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xfd,0x03,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xfd,0x03,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[1:2], vcc ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xd5,0x00,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[1:2], vcc ; encoding: [0x0a,0x00,0x28,0xd4,0x01,0xd5,0x00,0x00] +0x0a,0x00,0x28,0xd4,0x01,0xd5,0x00,0x00 + +# W32: v_cmp_u_f64_e64 s10, v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xfe,0x05,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], v[254:255], v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0xfe,0x05,0x02,0x00] +0x0a,0x00,0x28,0xd4,0xfe,0x05,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s10, vcc, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x6a,0x04,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[10:11], vcc, v[2:3] ; encoding: [0x0a,0x00,0x28,0xd4,0x6a,0x04,0x02,0x00] +0x0a,0x00,0x28,0xd4,0x6a,0x04,0x02,0x00 + +# W32: v_cmp_u_f64_e64 s12, v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +# W64: v_cmp_u_f64_e64 s[12:13], v[1:2], v[2:3] ; encoding: [0x0c,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +0x0c,0x00,0x28,0xd4,0x01,0x05,0x02,0x00 +# GFX11: v_cmpx_class_f16_e32 0, v2 ; encoding: [0x80,0x04,0xfa,0x7d] +0x80,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0xfa,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0xfa,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0xfa,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0xfa,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0xfa,0x7d] +0xc1,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0xfa,0x7d] +0x7f,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0xfa,0x7d] +0x7e,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0xfa,0x7d] +0x7d,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 s101, v2 ; encoding: [0x65,0x04,0xfa,0x7d] +0x65,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 s1, v2 ; encoding: [0x01,0x04,0xfa,0x7d] +0x01,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] +0x01,0x05,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0xfa,0x7d] +0x6b,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0xfa,0x7d] +0x6a,0x04,0xfa,0x7d + +# GFX11: v_cmpx_class_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_class_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_class_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xfd,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0xfc,0x7d] +0xf0,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 0, v2 ; encoding: [0x80,0x04,0xfc,0x7d] +0x80,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0xfc,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xfc,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_class_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0xfc,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xfc,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_class_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0xfc,0x7d] +0xc1,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0xfc,0x7d] +0xf7,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0xfc,0x7d] +0x7f,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0xfc,0x7d] +0x7e,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0xfc,0x7d] +0x7d,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 s101, v2 ; encoding: [0x65,0x04,0xfc,0x7d] +0x65,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 s1, v2 ; encoding: [0x01,0x04,0xfc,0x7d] +0x01,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 v1, v255 ; encoding: [0x01,0xff,0xfd,0x7d] +0x01,0xff,0xfd,0x7d + +# GFX11: v_cmpx_class_f32_e32 v1, v2 ; encoding: [0x01,0x05,0xfc,0x7d] +0x01,0x05,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 v255, v2 ; encoding: [0xff,0x05,0xfc,0x7d] +0xff,0x05,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0xfc,0x7d] +0x6b,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0xfc,0x7d] +0x6a,0x04,0xfc,0x7d + +# GFX11: v_cmpx_class_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_class_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_class_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xfe,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e32 0.5, v2 ; encoding: [0xf0,0x04,0xfe,0x7d] +0xf0,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 0, v2 ; encoding: [0x80,0x04,0xfe,0x7d] +0x80,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0xfe,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xfe,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_class_f64_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0xfe,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xfe,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_class_f64_e32 -1, v2 ; encoding: [0xc1,0x04,0xfe,0x7d] +0xc1,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 -4.0, v2 ; encoding: [0xf7,0x04,0xfe,0x7d] +0xf7,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 exec, v2 ; encoding: [0x7e,0x04,0xfe,0x7d] +0x7e,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 s[100:101], v2 ; encoding: [0x64,0x04,0xfe,0x7d] +0x64,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 s[2:3], v2 ; encoding: [0x02,0x04,0xfe,0x7d] +0x02,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 s[4:5], v2 ; encoding: [0x04,0x04,0xfe,0x7d] +0x04,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 v[1:2], v255 ; encoding: [0x01,0xff,0xff,0x7d] +0x01,0xff,0xff,0x7d + +# GFX11: v_cmpx_class_f64_e32 v[1:2], v2 ; encoding: [0x01,0x05,0xfe,0x7d] +0x01,0x05,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 v[254:255], v2 ; encoding: [0xfe,0x05,0xfe,0x7d] +0xfe,0x05,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e32 vcc, v2 ; encoding: [0x6a,0x04,0xfe,0x7d] +0x6a,0x04,0xfe,0x7d + +# GFX11: v_cmpx_class_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xff,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xff,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], exec_hi ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], exec_lo ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], m0 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], s101 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], s2 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 -v[1:2], v2 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xff,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], vcc_hi ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_class_f64_e64 v[1:2], vcc_lo ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xff,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e32 0, v2 ; encoding: [0x80,0x04,0x04,0x7d] +0x80,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x04,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x04,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x04,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x04,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x04,0x7d] +0xc1,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x04,0x7d] +0x7f,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x04,0x7d] +0x7e,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x04,0x7d] +0x7d,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x04,0x7d] +0x65,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x04,0x7d] +0x01,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x04,0x7d] +0x01,0x05,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x04,0x7d] +0x6b,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x04,0x7d] +0x6a,0x04,0x04,0x7d + +# GFX11: v_cmpx_eq_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x82,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x82,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_eq_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_eq_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_eq_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x82,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x24,0x7d] +0xf0,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 0, v2 ; encoding: [0x80,0x04,0x24,0x7d] +0x80,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x24,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x24,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x24,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x24,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x24,0x7d] +0xc1,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x24,0x7d] +0xf7,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x24,0x7d] +0x7f,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x24,0x7d] +0x7e,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x24,0x7d] +0x7d,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x24,0x7d] +0x65,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x24,0x7d] +0x01,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x25,0x7d] +0x01,0xff,0x25,0x7d + +# GFX11: v_cmpx_eq_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x24,0x7d] +0x01,0x05,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x24,0x7d] +0xff,0x05,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x24,0x7d] +0x6b,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x24,0x7d] +0x6a,0x04,0x24,0x7d + +# GFX11: v_cmpx_eq_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x92,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x92,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_eq_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_eq_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_eq_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x92,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x44,0x7d] +0xf0,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x44,0x7d] +0x80,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x44,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x44,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x44,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x44,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x44,0x7d] +0xc1,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x44,0x7d] +0xf7,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x44,0x7d] +0x7e,0x02,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x44,0x7d] +0x7e,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x44,0x7d] +0x64,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x44,0x7d] +0x02,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x44,0x7d] +0x04,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x44,0x7d] +0x01,0x05,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x45,0x7d] +0x01,0xfd,0x45,0x7d + +# GFX11: v_cmpx_eq_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x44,0x7d] +0xfe,0x05,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x44,0x7d] +0x6a,0x04,0x44,0x7d + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_eq_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_eq_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_eq_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa2,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e32 0, v2 ; encoding: [0x80,0x04,0x64,0x7d] +0x80,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x64,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x64,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x64,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x64,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x64,0x7d] +0xc1,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x64,0x7d] +0x7f,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x64,0x7d] +0x7e,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x64,0x7d] +0x7d,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x64,0x7d] +0x65,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x64,0x7d] +0x01,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x64,0x7d] +0x01,0x05,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x64,0x7d] +0x6b,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x64,0x7d] +0x6a,0x04,0x64,0x7d + +# GFX11: v_cmpx_eq_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb2,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x84,0x7d] +0xf0,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 0, v2 ; encoding: [0x80,0x04,0x84,0x7d] +0x80,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x84,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x84,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x84,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x84,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x84,0x7d] +0xc1,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x84,0x7d] +0xf7,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x84,0x7d] +0x7f,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x84,0x7d] +0x7e,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x84,0x7d] +0x7d,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x84,0x7d] +0x65,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x84,0x7d] +0x01,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x85,0x7d] +0x01,0xff,0x85,0x7d + +# GFX11: v_cmpx_eq_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x84,0x7d] +0x01,0x05,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x84,0x7d] +0xff,0x05,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x84,0x7d] +0x6b,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x84,0x7d] +0x6a,0x04,0x84,0x7d + +# GFX11: v_cmpx_eq_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc2,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa4,0x7d] +0xf0,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xa4,0x7d] +0x80,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa4,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa4,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa4,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xa4,0x7d] +0xc1,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa4,0x7d] +0xf7,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xa4,0x7d] +0x7e,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa4,0x7d] +0x64,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa4,0x7d] +0x02,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa4,0x7d] +0x04,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa4,0x7d] +0x01,0x05,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa5,0x7d] +0x01,0xfd,0xa5,0x7d + +# GFX11: v_cmpx_eq_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa4,0x7d] +0xfe,0x05,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xa4,0x7d] +0x6a,0x04,0xa4,0x7d + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_eq_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd2,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e32 0, v2 ; encoding: [0x80,0x04,0x74,0x7d] +0x80,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x74,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x74,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x74,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x74,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x74,0x7d] +0xc1,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x74,0x7d] +0x7f,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x74,0x7d] +0x7e,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x74,0x7d] +0x7d,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x74,0x7d] +0x65,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x74,0x7d] +0x01,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x74,0x7d] +0x01,0x05,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x74,0x7d] +0x6b,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x74,0x7d] +0x6a,0x04,0x74,0x7d + +# GFX11: v_cmpx_eq_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xba,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xba,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xba,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x94,0x7d] +0xf0,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 0, v2 ; encoding: [0x80,0x04,0x94,0x7d] +0x80,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x94,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x94,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x94,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x94,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x94,0x7d] +0xc1,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x94,0x7d] +0xf7,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x94,0x7d] +0x7f,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x94,0x7d] +0x7e,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x94,0x7d] +0x7d,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x94,0x7d] +0x65,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x94,0x7d] +0x01,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x95,0x7d] +0x01,0xff,0x95,0x7d + +# GFX11: v_cmpx_eq_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x94,0x7d] +0x01,0x05,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x94,0x7d] +0xff,0x05,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x94,0x7d] +0x6b,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x94,0x7d] +0x6a,0x04,0x94,0x7d + +# GFX11: v_cmpx_eq_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xca,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xca,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_eq_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xca,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_eq_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb4,0x7d] +0xf0,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xb4,0x7d] +0x80,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb4,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_eq_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb4,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb4,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_eq_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xb4,0x7d] +0xc1,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb4,0x7d] +0xf7,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xb4,0x7d] +0x7e,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb4,0x7d] +0x64,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb4,0x7d] +0x02,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb4,0x7d] +0x04,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb4,0x7d] +0x01,0x05,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb5,0x7d] +0x01,0xfd,0xb5,0x7d + +# GFX11: v_cmpx_eq_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb4,0x7d] +0xfe,0x05,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xb4,0x7d] +0x6a,0x04,0xb4,0x7d + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xda,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xda,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xda,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xda,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xda,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xda,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xda,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xda,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_eq_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xda,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e32 0, v2 ; encoding: [0x80,0x04,0x00,0x7d] +0x80,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x00,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x00,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x00,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x00,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x00,0x7d] +0xc1,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x00,0x7d] +0x7f,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x00,0x7d] +0x7e,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x00,0x7d] +0x7d,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x00,0x7d] +0x65,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x00,0x7d] +0x01,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x00,0x7d] +0x01,0x05,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x00,0x7d] +0x6b,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x00,0x7d] +0x6a,0x04,0x00,0x7d + +# GFX11: v_cmpx_f_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x80,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x80,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_f_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_f_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x80,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_f_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_f_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x80,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x20,0x7d] +0xf0,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 0, v2 ; encoding: [0x80,0x04,0x20,0x7d] +0x80,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x20,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x20,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x20,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x20,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x20,0x7d] +0xc1,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x20,0x7d] +0xf7,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x20,0x7d] +0x7f,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x20,0x7d] +0x7e,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x20,0x7d] +0x7d,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x20,0x7d] +0x65,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x20,0x7d] +0x01,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x21,0x7d] +0x01,0xff,0x21,0x7d + +# GFX11: v_cmpx_f_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x20,0x7d] +0x01,0x05,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x20,0x7d] +0xff,0x05,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x20,0x7d] +0x6b,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x20,0x7d] +0x6a,0x04,0x20,0x7d + +# GFX11: v_cmpx_f_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x90,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x90,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_f_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_f_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_f_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_f_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x90,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x40,0x7d] +0xf0,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x40,0x7d] +0x80,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x40,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x40,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x40,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x40,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x40,0x7d] +0xc1,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x40,0x7d] +0xf7,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x40,0x7d] +0x7e,0x02,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x40,0x7d] +0x7e,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x40,0x7d] +0x64,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x40,0x7d] +0x02,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x40,0x7d] +0x04,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x40,0x7d] +0x01,0x05,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x41,0x7d] +0x01,0xfd,0x41,0x7d + +# GFX11: v_cmpx_f_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x40,0x7d] +0xfe,0x05,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x40,0x7d] +0x6a,0x04,0x40,0x7d + +# GFX11: v_cmpx_f_f64_e64 0.5, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0xf0,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0xf0,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_f64_e64 0, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x80,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0x80,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_f64_e64 -1, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0xc1,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0xc1,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_f64_e64 -4.0, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0xf7,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0xf7,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_f64_e64 exec, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x7e,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0x7e,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_f_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa0,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], v[254:255] ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xfd,0x03,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xfd,0x03,0x00 + +# GFX11: v_cmpx_f_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa0,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa0,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_f64_e64 vcc, v[2:3] ; encoding: [0x7e,0x00,0xa0,0xd4,0x6a,0x04,0x02,0x00] +0x7e,0x00,0xa0,0xd4,0x6a,0x04,0x02,0x00 + +# GFX11: v_cmpx_f_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x80,0x7d] +0xf0,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 0, v2 ; encoding: [0x80,0x04,0x80,0x7d] +0x80,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x80,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x80,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x80,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x80,0x7d] +0xc1,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x80,0x7d] +0xf7,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x80,0x7d] +0x7f,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x80,0x7d] +0x7e,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x80,0x7d] +0x7d,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x80,0x7d] +0x65,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x80,0x7d] +0x01,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x81,0x7d] +0x01,0xff,0x81,0x7d + +# GFX11: v_cmpx_f_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x80,0x7d] +0x01,0x05,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x80,0x7d] +0xff,0x05,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x80,0x7d] +0x6b,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x80,0x7d] +0x6a,0x04,0x80,0x7d + +# GFX11: v_cmpx_f_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_f_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc0,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc0,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa0,0x7d] +0xf0,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xa0,0x7d] +0x80,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa0,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa0,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa0,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xa0,0x7d] +0xc1,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa0,0x7d] +0xf7,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xa0,0x7d] +0x7e,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa0,0x7d] +0x64,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa0,0x7d] +0x02,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa0,0x7d] +0x04,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa0,0x7d] +0x01,0x05,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa1,0x7d] +0x01,0xfd,0xa1,0x7d + +# GFX11: v_cmpx_f_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa0,0x7d] +0xfe,0x05,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xa0,0x7d] +0x6a,0x04,0xa0,0x7d + +# GFX11: v_cmpx_f_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_f_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd0,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd0,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x90,0x7d] +0xf0,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 0, v2 ; encoding: [0x80,0x04,0x90,0x7d] +0x80,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x90,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x90,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x90,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x90,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x90,0x7d] +0xc1,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x90,0x7d] +0xf7,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x90,0x7d] +0x7f,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x90,0x7d] +0x7e,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x90,0x7d] +0x7d,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x90,0x7d] +0x65,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x90,0x7d] +0x01,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x91,0x7d] +0x01,0xff,0x91,0x7d + +# GFX11: v_cmpx_f_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x90,0x7d] +0x01,0x05,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x90,0x7d] +0xff,0x05,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x90,0x7d] +0x6b,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x90,0x7d] +0x6a,0x04,0x90,0x7d + +# GFX11: v_cmpx_f_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_f_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc8,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc8,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_f_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb0,0x7d] +0xf0,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xb0,0x7d] +0x80,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb0,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_f_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb0,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb0,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_f_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xb0,0x7d] +0xc1,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb0,0x7d] +0xf7,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xb0,0x7d] +0x7e,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb0,0x7d] +0x64,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb0,0x7d] +0x02,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb0,0x7d] +0x04,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb0,0x7d] +0x01,0x05,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb1,0x7d] +0x01,0xfd,0xb1,0x7d + +# GFX11: v_cmpx_f_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb0,0x7d] +0xfe,0x05,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xb0,0x7d] +0x6a,0x04,0xb0,0x7d + +# GFX11: v_cmpx_f_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_f_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd8,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd8,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e32 0, v2 ; encoding: [0x80,0x04,0x0c,0x7d] +0x80,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x0c,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x0c,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x0c,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0c,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x0c,0x7d] +0xc1,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x0c,0x7d] +0x7f,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x0c,0x7d] +0x7e,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x0c,0x7d] +0x7d,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x0c,0x7d] +0x65,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x0c,0x7d] +0x01,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x0c,0x7d] +0x01,0x05,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x0c,0x7d] +0x6b,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x0c,0x7d] +0x6a,0x04,0x0c,0x7d + +# GFX11: v_cmpx_ge_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x86,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x86,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ge_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ge_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ge_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x86,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x2c,0x7d] +0xf0,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 0, v2 ; encoding: [0x80,0x04,0x2c,0x7d] +0x80,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x2c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x2c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x2c,0x7d] +0xc1,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x2c,0x7d] +0xf7,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x2c,0x7d] +0x7f,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x2c,0x7d] +0x7e,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x2c,0x7d] +0x7d,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x2c,0x7d] +0x65,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x2c,0x7d] +0x01,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x2d,0x7d] +0x01,0xff,0x2d,0x7d + +# GFX11: v_cmpx_ge_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x2c,0x7d] +0x01,0x05,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x2c,0x7d] +0xff,0x05,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x2c,0x7d] +0x6b,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x2c,0x7d] +0x6a,0x04,0x2c,0x7d + +# GFX11: v_cmpx_ge_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x96,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x96,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ge_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ge_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ge_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x96,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4c,0x7d] +0xf0,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x4c,0x7d] +0x80,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x4c,0x7d] +0xc1,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4c,0x7d] +0xf7,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x4c,0x7d] +0x7e,0x02,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x4c,0x7d] +0x7e,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4c,0x7d] +0x64,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4c,0x7d] +0x02,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4c,0x7d] +0x04,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4c,0x7d] +0x01,0x05,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4d,0x7d] +0x01,0xfd,0x4d,0x7d + +# GFX11: v_cmpx_ge_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4c,0x7d] +0xfe,0x05,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x4c,0x7d] +0x6a,0x04,0x4c,0x7d + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ge_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ge_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ge_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa6,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e32 0, v2 ; encoding: [0x80,0x04,0x6c,0x7d] +0x80,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x6c,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x6c,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x6c,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x6c,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x6c,0x7d] +0xc1,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x6c,0x7d] +0x7f,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x6c,0x7d] +0x7e,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x6c,0x7d] +0x7d,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x6c,0x7d] +0x65,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x6c,0x7d] +0x01,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x6c,0x7d] +0x01,0x05,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x6c,0x7d] +0x6b,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x6c,0x7d] +0x6a,0x04,0x6c,0x7d + +# GFX11: v_cmpx_ge_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb6,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x8c,0x7d] +0xf0,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 0, v2 ; encoding: [0x80,0x04,0x8c,0x7d] +0x80,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x8c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x8c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x8c,0x7d] +0xc1,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x8c,0x7d] +0xf7,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x8c,0x7d] +0x7f,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x8c,0x7d] +0x7e,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x8c,0x7d] +0x7d,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x8c,0x7d] +0x65,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x8c,0x7d] +0x01,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x8d,0x7d] +0x01,0xff,0x8d,0x7d + +# GFX11: v_cmpx_ge_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x8c,0x7d] +0x01,0x05,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x8c,0x7d] +0xff,0x05,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x8c,0x7d] +0x6b,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x8c,0x7d] +0x6a,0x04,0x8c,0x7d + +# GFX11: v_cmpx_ge_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc6,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xac,0x7d] +0xf0,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xac,0x7d] +0x80,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xac,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xac,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xac,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xac,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xac,0x7d] +0xc1,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xac,0x7d] +0xf7,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xac,0x7d] +0x7e,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xac,0x7d] +0x64,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xac,0x7d] +0x02,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xac,0x7d] +0x04,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xac,0x7d] +0x01,0x05,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xad,0x7d] +0x01,0xfd,0xad,0x7d + +# GFX11: v_cmpx_ge_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xac,0x7d] +0xfe,0x05,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xac,0x7d] +0x6a,0x04,0xac,0x7d + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ge_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd6,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e32 0, v2 ; encoding: [0x80,0x04,0x7c,0x7d] +0x80,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x7c,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x7c,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x7c,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x7c,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x7c,0x7d] +0xc1,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x7c,0x7d] +0x7f,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x7c,0x7d] +0x7e,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x7c,0x7d] +0x7d,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x7c,0x7d] +0x65,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x7c,0x7d] +0x01,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x7c,0x7d] +0x01,0x05,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x7c,0x7d] +0x6b,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x7c,0x7d] +0x6a,0x04,0x7c,0x7d + +# GFX11: v_cmpx_ge_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xbe,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x9c,0x7d] +0xf0,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 0, v2 ; encoding: [0x80,0x04,0x9c,0x7d] +0x80,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x9c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x9c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x9c,0x7d] +0xc1,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x9c,0x7d] +0xf7,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x9c,0x7d] +0x7f,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x9c,0x7d] +0x7e,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x9c,0x7d] +0x7d,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x9c,0x7d] +0x65,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x9c,0x7d] +0x01,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x9d,0x7d] +0x01,0xff,0x9d,0x7d + +# GFX11: v_cmpx_ge_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x9c,0x7d] +0x01,0x05,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x9c,0x7d] +0xff,0x05,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x9c,0x7d] +0x6b,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x9c,0x7d] +0x6a,0x04,0x9c,0x7d + +# GFX11: v_cmpx_ge_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xce,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xce,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ge_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xce,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ge_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbc,0x7d] +0xf0,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xbc,0x7d] +0x80,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xbc,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ge_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbc,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xbc,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ge_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xbc,0x7d] +0xc1,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbc,0x7d] +0xf7,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xbc,0x7d] +0x7e,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbc,0x7d] +0x64,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbc,0x7d] +0x02,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbc,0x7d] +0x04,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbc,0x7d] +0x01,0x05,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbd,0x7d] +0x01,0xfd,0xbd,0x7d + +# GFX11: v_cmpx_ge_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbc,0x7d] +0xfe,0x05,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xbc,0x7d] +0x6a,0x04,0xbc,0x7d + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xde,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xde,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xde,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xde,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xde,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xde,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xde,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xde,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ge_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xde,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e32 0, v2 ; encoding: [0x80,0x04,0x08,0x7d] +0x80,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x08,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x08,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x08,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x08,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x08,0x7d] +0xc1,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x08,0x7d] +0x7f,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x08,0x7d] +0x7e,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x08,0x7d] +0x7d,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x08,0x7d] +0x65,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x08,0x7d] +0x01,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x08,0x7d] +0x01,0x05,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x08,0x7d] +0x6b,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x08,0x7d] +0x6a,0x04,0x08,0x7d + +# GFX11: v_cmpx_gt_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x84,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x84,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_gt_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_gt_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_gt_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x84,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x28,0x7d] +0xf0,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 0, v2 ; encoding: [0x80,0x04,0x28,0x7d] +0x80,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x28,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x28,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x28,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x28,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x28,0x7d] +0xc1,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x28,0x7d] +0xf7,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x28,0x7d] +0x7f,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x28,0x7d] +0x7e,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x28,0x7d] +0x7d,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x28,0x7d] +0x65,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x28,0x7d] +0x01,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x29,0x7d] +0x01,0xff,0x29,0x7d + +# GFX11: v_cmpx_gt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x28,0x7d] +0x01,0x05,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x28,0x7d] +0xff,0x05,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x28,0x7d] +0x6b,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x28,0x7d] +0x6a,0x04,0x28,0x7d + +# GFX11: v_cmpx_gt_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x94,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x94,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_gt_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_gt_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_gt_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x94,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x48,0x7d] +0xf0,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x48,0x7d] +0x80,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x48,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x48,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x48,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x48,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x48,0x7d] +0xc1,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x48,0x7d] +0xf7,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x48,0x7d] +0x7e,0x02,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x48,0x7d] +0x7e,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x48,0x7d] +0x64,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x48,0x7d] +0x02,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x48,0x7d] +0x04,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x48,0x7d] +0x01,0x05,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x49,0x7d] +0x01,0xfd,0x49,0x7d + +# GFX11: v_cmpx_gt_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x48,0x7d] +0xfe,0x05,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x48,0x7d] +0x6a,0x04,0x48,0x7d + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_gt_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_gt_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_gt_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa4,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e32 0, v2 ; encoding: [0x80,0x04,0x68,0x7d] +0x80,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x68,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x68,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x68,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x68,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x68,0x7d] +0xc1,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x68,0x7d] +0x7f,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x68,0x7d] +0x7e,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x68,0x7d] +0x7d,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x68,0x7d] +0x65,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x68,0x7d] +0x01,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x68,0x7d] +0x01,0x05,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x68,0x7d] +0x6b,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x68,0x7d] +0x6a,0x04,0x68,0x7d + +# GFX11: v_cmpx_gt_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb4,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x88,0x7d] +0xf0,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 0, v2 ; encoding: [0x80,0x04,0x88,0x7d] +0x80,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x88,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x88,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x88,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x88,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x88,0x7d] +0xc1,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x88,0x7d] +0xf7,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x88,0x7d] +0x7f,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x88,0x7d] +0x7e,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x88,0x7d] +0x7d,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x88,0x7d] +0x65,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x88,0x7d] +0x01,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x89,0x7d] +0x01,0xff,0x89,0x7d + +# GFX11: v_cmpx_gt_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x88,0x7d] +0x01,0x05,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x88,0x7d] +0xff,0x05,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x88,0x7d] +0x6b,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x88,0x7d] +0x6a,0x04,0x88,0x7d + +# GFX11: v_cmpx_gt_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc4,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa8,0x7d] +0xf0,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xa8,0x7d] +0x80,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa8,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa8,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa8,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xa8,0x7d] +0xc1,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa8,0x7d] +0xf7,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xa8,0x7d] +0x7e,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa8,0x7d] +0x64,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa8,0x7d] +0x02,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa8,0x7d] +0x04,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa8,0x7d] +0x01,0x05,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa9,0x7d] +0x01,0xfd,0xa9,0x7d + +# GFX11: v_cmpx_gt_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa8,0x7d] +0xfe,0x05,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xa8,0x7d] +0x6a,0x04,0xa8,0x7d + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_gt_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd4,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e32 0, v2 ; encoding: [0x80,0x04,0x78,0x7d] +0x80,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x78,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x78,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x78,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x78,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x78,0x7d] +0xc1,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x78,0x7d] +0x7f,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x78,0x7d] +0x7e,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x78,0x7d] +0x7d,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x78,0x7d] +0x65,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x78,0x7d] +0x01,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x78,0x7d] +0x01,0x05,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x78,0x7d] +0x6b,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x78,0x7d] +0x6a,0x04,0x78,0x7d + +# GFX11: v_cmpx_gt_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xbc,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x98,0x7d] +0xf0,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 0, v2 ; encoding: [0x80,0x04,0x98,0x7d] +0x80,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x98,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x98,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x98,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x98,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x98,0x7d] +0xc1,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x98,0x7d] +0xf7,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x98,0x7d] +0x7f,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x98,0x7d] +0x7e,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x98,0x7d] +0x7d,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x98,0x7d] +0x65,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x98,0x7d] +0x01,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x99,0x7d] +0x01,0xff,0x99,0x7d + +# GFX11: v_cmpx_gt_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x98,0x7d] +0x01,0x05,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x98,0x7d] +0xff,0x05,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x98,0x7d] +0x6b,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x98,0x7d] +0x6a,0x04,0x98,0x7d + +# GFX11: v_cmpx_gt_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_gt_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xcc,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_gt_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb8,0x7d] +0xf0,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xb8,0x7d] +0x80,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb8,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_gt_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb8,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb8,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_gt_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xb8,0x7d] +0xc1,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb8,0x7d] +0xf7,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xb8,0x7d] +0x7e,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb8,0x7d] +0x64,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb8,0x7d] +0x02,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb8,0x7d] +0x04,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb8,0x7d] +0x01,0x05,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb9,0x7d] +0x01,0xfd,0xb9,0x7d + +# GFX11: v_cmpx_gt_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb8,0x7d] +0xfe,0x05,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xb8,0x7d] +0x6a,0x04,0xb8,0x7d + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_gt_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xdc,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e32 0, v2 ; encoding: [0x80,0x04,0x06,0x7d] +0x80,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x06,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x06,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x06,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x06,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x06,0x7d] +0xc1,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x06,0x7d] +0x7f,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x06,0x7d] +0x7e,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x06,0x7d] +0x7d,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x06,0x7d] +0x65,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x06,0x7d] +0x01,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x06,0x7d] +0x01,0x05,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x06,0x7d] +0x6b,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x06,0x7d] +0x6a,0x04,0x06,0x7d + +# GFX11: v_cmpx_le_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x83,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x83,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_le_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_le_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_le_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x83,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x26,0x7d] +0xf0,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 0, v2 ; encoding: [0x80,0x04,0x26,0x7d] +0x80,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x26,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x26,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x26,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x26,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x26,0x7d] +0xc1,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x26,0x7d] +0xf7,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x26,0x7d] +0x7f,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x26,0x7d] +0x7e,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x26,0x7d] +0x7d,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x26,0x7d] +0x65,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x26,0x7d] +0x01,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x27,0x7d] +0x01,0xff,0x27,0x7d + +# GFX11: v_cmpx_le_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x26,0x7d] +0x01,0x05,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x26,0x7d] +0xff,0x05,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x26,0x7d] +0x6b,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x26,0x7d] +0x6a,0x04,0x26,0x7d + +# GFX11: v_cmpx_le_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x93,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x93,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_le_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_le_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_le_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x93,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x46,0x7d] +0xf0,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x46,0x7d] +0x80,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x46,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x46,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x46,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x46,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x46,0x7d] +0xc1,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x46,0x7d] +0xf7,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x46,0x7d] +0x7e,0x02,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x46,0x7d] +0x7e,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x46,0x7d] +0x64,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x46,0x7d] +0x02,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x46,0x7d] +0x04,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x46,0x7d] +0x01,0x05,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x47,0x7d] +0x01,0xfd,0x47,0x7d + +# GFX11: v_cmpx_le_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x46,0x7d] +0xfe,0x05,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x46,0x7d] +0x6a,0x04,0x46,0x7d + +# GFX11: v_cmpx_le_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_le_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_le_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_le_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa3,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e32 0, v2 ; encoding: [0x80,0x04,0x66,0x7d] +0x80,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x66,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x66,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x66,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x66,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x66,0x7d] +0xc1,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x66,0x7d] +0x7f,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x66,0x7d] +0x7e,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x66,0x7d] +0x7d,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x66,0x7d] +0x65,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x66,0x7d] +0x01,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x66,0x7d] +0x01,0x05,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x66,0x7d] +0x6b,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x66,0x7d] +0x6a,0x04,0x66,0x7d + +# GFX11: v_cmpx_le_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb3,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x86,0x7d] +0xf0,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 0, v2 ; encoding: [0x80,0x04,0x86,0x7d] +0x80,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x86,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x86,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x86,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x86,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x86,0x7d] +0xc1,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x86,0x7d] +0xf7,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x86,0x7d] +0x7f,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x86,0x7d] +0x7e,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x86,0x7d] +0x7d,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x86,0x7d] +0x65,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x86,0x7d] +0x01,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x87,0x7d] +0x01,0xff,0x87,0x7d + +# GFX11: v_cmpx_le_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x86,0x7d] +0x01,0x05,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x86,0x7d] +0xff,0x05,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x86,0x7d] +0x6b,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x86,0x7d] +0x6a,0x04,0x86,0x7d + +# GFX11: v_cmpx_le_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc3,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa6,0x7d] +0xf0,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xa6,0x7d] +0x80,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa6,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa6,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa6,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xa6,0x7d] +0xc1,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa6,0x7d] +0xf7,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xa6,0x7d] +0x7e,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa6,0x7d] +0x64,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa6,0x7d] +0x02,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa6,0x7d] +0x04,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa6,0x7d] +0x01,0x05,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa7,0x7d] +0x01,0xfd,0xa7,0x7d + +# GFX11: v_cmpx_le_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa6,0x7d] +0xfe,0x05,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xa6,0x7d] +0x6a,0x04,0xa6,0x7d + +# GFX11: v_cmpx_le_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_le_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd3,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e32 0, v2 ; encoding: [0x80,0x04,0x76,0x7d] +0x80,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x76,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x76,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x76,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x76,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x76,0x7d] +0xc1,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x76,0x7d] +0x7f,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x76,0x7d] +0x7e,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x76,0x7d] +0x7d,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x76,0x7d] +0x65,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x76,0x7d] +0x01,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x76,0x7d] +0x01,0x05,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x76,0x7d] +0x6b,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x76,0x7d] +0x6a,0x04,0x76,0x7d + +# GFX11: v_cmpx_le_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xbb,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x96,0x7d] +0xf0,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 0, v2 ; encoding: [0x80,0x04,0x96,0x7d] +0x80,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x96,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x96,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x96,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x96,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x96,0x7d] +0xc1,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x96,0x7d] +0xf7,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x96,0x7d] +0x7f,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x96,0x7d] +0x7e,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x96,0x7d] +0x7d,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x96,0x7d] +0x65,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x96,0x7d] +0x01,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x97,0x7d] +0x01,0xff,0x97,0x7d + +# GFX11: v_cmpx_le_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x96,0x7d] +0x01,0x05,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x96,0x7d] +0xff,0x05,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x96,0x7d] +0x6b,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x96,0x7d] +0x6a,0x04,0x96,0x7d + +# GFX11: v_cmpx_le_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_le_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xcb,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_le_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb6,0x7d] +0xf0,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xb6,0x7d] +0x80,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb6,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_le_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb6,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb6,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_le_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xb6,0x7d] +0xc1,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb6,0x7d] +0xf7,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xb6,0x7d] +0x7e,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb6,0x7d] +0x64,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb6,0x7d] +0x02,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb6,0x7d] +0x04,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb6,0x7d] +0x01,0x05,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb7,0x7d] +0x01,0xfd,0xb7,0x7d + +# GFX11: v_cmpx_le_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb6,0x7d] +0xfe,0x05,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xb6,0x7d] +0x6a,0x04,0xb6,0x7d + +# GFX11: v_cmpx_le_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_le_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xdb,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e32 0, v2 ; encoding: [0x80,0x04,0x0a,0x7d] +0x80,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x0a,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x0a,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x0a,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0a,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x0a,0x7d] +0xc1,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x7d] +0x7f,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x7d] +0x7e,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x0a,0x7d] +0x7d,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x0a,0x7d] +0x65,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x0a,0x7d] +0x01,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x0a,0x7d] +0x01,0x05,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x7d] +0x6b,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x7d] +0x6a,0x04,0x0a,0x7d + +# GFX11: v_cmpx_lg_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x85,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x85,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lg_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lg_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lg_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lg_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x85,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x2a,0x7d] +0xf0,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 0, v2 ; encoding: [0x80,0x04,0x2a,0x7d] +0x80,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x2a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lg_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x2a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lg_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x2a,0x7d] +0xc1,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x2a,0x7d] +0xf7,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x2a,0x7d] +0x7f,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x2a,0x7d] +0x7e,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x2a,0x7d] +0x7d,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x2a,0x7d] +0x65,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x2a,0x7d] +0x01,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x2b,0x7d] +0x01,0xff,0x2b,0x7d + +# GFX11: v_cmpx_lg_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x2a,0x7d] +0x01,0x05,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x2a,0x7d] +0xff,0x05,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x2a,0x7d] +0x6b,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x2a,0x7d] +0x6a,0x04,0x2a,0x7d + +# GFX11: v_cmpx_lg_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x95,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x95,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lg_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lg_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lg_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lg_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x95,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lg_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4a,0x7d] +0xf0,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x4a,0x7d] +0x80,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lg_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lg_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x4a,0x7d] +0xc1,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4a,0x7d] +0xf7,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x4a,0x7d] +0x7e,0x02,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x4a,0x7d] +0x7e,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4a,0x7d] +0x64,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4a,0x7d] +0x02,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4a,0x7d] +0x04,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4a,0x7d] +0x01,0x05,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4b,0x7d] +0x01,0xfd,0x4b,0x7d + +# GFX11: v_cmpx_lg_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4a,0x7d] +0xfe,0x05,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x4a,0x7d] +0x6a,0x04,0x4a,0x7d + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_lg_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lg_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lg_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa5,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e32 0, v2 ; encoding: [0x80,0x04,0x02,0x7d] +0x80,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x02,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x02,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x02,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x02,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] +0xc1,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] +0x7f,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] +0x7e,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] +0x7d,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x02,0x7d] +0x65,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] +0x01,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] +0x01,0x05,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] +0x6b,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] +0x6a,0x04,0x02,0x7d + +# GFX11: v_cmpx_lt_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x81,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x81,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lt_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lt_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lt_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x81,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x22,0x7d] +0xf0,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 0, v2 ; encoding: [0x80,0x04,0x22,0x7d] +0x80,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x22,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x22,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x22,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x22,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x22,0x7d] +0xc1,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x22,0x7d] +0xf7,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x22,0x7d] +0x7f,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x22,0x7d] +0x7e,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x22,0x7d] +0x7d,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x22,0x7d] +0x65,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x22,0x7d] +0x01,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x23,0x7d] +0x01,0xff,0x23,0x7d + +# GFX11: v_cmpx_lt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x22,0x7d] +0x01,0x05,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x22,0x7d] +0xff,0x05,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x22,0x7d] +0x6b,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x22,0x7d] +0x6a,0x04,0x22,0x7d + +# GFX11: v_cmpx_lt_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x91,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x91,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lt_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lt_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lt_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x91,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x42,0x7d] +0xf0,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x42,0x7d] +0x80,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x42,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x42,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x42,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x42,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x42,0x7d] +0xc1,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x42,0x7d] +0xf7,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x42,0x7d] +0x7e,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x42,0x7d] +0x64,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x42,0x7d] +0x02,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x42,0x7d] +0x04,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x42,0x7d] +0x01,0x05,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x43,0x7d] +0x01,0xfd,0x43,0x7d + +# GFX11: v_cmpx_lt_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x42,0x7d] +0xfe,0x05,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x42,0x7d] +0x6a,0x04,0x42,0x7d + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_lt_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_lt_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_lt_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa1,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e32 0, v2 ; encoding: [0x80,0x04,0x62,0x7d] +0x80,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x62,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x62,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x62,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x62,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x62,0x7d] +0xc1,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x62,0x7d] +0x7f,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x62,0x7d] +0x7e,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x62,0x7d] +0x7d,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x62,0x7d] +0x65,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x62,0x7d] +0x01,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x62,0x7d] +0x01,0x05,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x62,0x7d] +0x6b,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x62,0x7d] +0x6a,0x04,0x62,0x7d + +# GFX11: v_cmpx_lt_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb1,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x82,0x7d] +0xf0,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 0, v2 ; encoding: [0x80,0x04,0x82,0x7d] +0x80,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x82,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x82,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x82,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x82,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x82,0x7d] +0xc1,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x82,0x7d] +0xf7,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x82,0x7d] +0x7f,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x82,0x7d] +0x7e,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x82,0x7d] +0x7d,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x82,0x7d] +0x65,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x82,0x7d] +0x01,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x83,0x7d] +0x01,0xff,0x83,0x7d + +# GFX11: v_cmpx_lt_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x82,0x7d] +0x01,0x05,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x82,0x7d] +0xff,0x05,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x82,0x7d] +0x6b,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x82,0x7d] +0x6a,0x04,0x82,0x7d + +# GFX11: v_cmpx_lt_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc1,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xa2,0x7d] +0xf0,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xa2,0x7d] +0x80,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xa2,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xa2,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa2,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xa2,0x7d] +0xc1,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xa2,0x7d] +0xf7,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xa2,0x7d] +0x7e,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xa2,0x7d] +0x64,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xa2,0x7d] +0x02,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xa2,0x7d] +0x04,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xa2,0x7d] +0x01,0x05,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xa3,0x7d] +0x01,0xfd,0xa3,0x7d + +# GFX11: v_cmpx_lt_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xa2,0x7d] +0xfe,0x05,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xa2,0x7d] +0x6a,0x04,0xa2,0x7d + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_lt_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd1,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e32 0, v2 ; encoding: [0x80,0x04,0x72,0x7d] +0x80,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x72,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x72,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x72,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x72,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x72,0x7d] +0xc1,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x72,0x7d] +0x7f,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x72,0x7d] +0x7e,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x72,0x7d] +0x7d,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x72,0x7d] +0x65,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x72,0x7d] +0x01,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x72,0x7d] +0x01,0x05,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x72,0x7d] +0x6b,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x72,0x7d] +0x6a,0x04,0x72,0x7d + +# GFX11: v_cmpx_lt_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb9,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x92,0x7d] +0xf0,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 0, v2 ; encoding: [0x80,0x04,0x92,0x7d] +0x80,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x92,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x92,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x92,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x92,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x92,0x7d] +0xc1,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x92,0x7d] +0xf7,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x92,0x7d] +0x7f,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x92,0x7d] +0x7e,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x92,0x7d] +0x7d,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x92,0x7d] +0x65,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x92,0x7d] +0x01,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x93,0x7d] +0x01,0xff,0x93,0x7d + +# GFX11: v_cmpx_lt_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x92,0x7d] +0x01,0x05,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x92,0x7d] +0xff,0x05,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x92,0x7d] +0x6b,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x92,0x7d] +0x6a,0x04,0x92,0x7d + +# GFX11: v_cmpx_lt_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_lt_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc9,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_lt_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xb2,0x7d] +0xf0,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xb2,0x7d] +0x80,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xb2,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_lt_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xb2,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb2,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_lt_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xb2,0x7d] +0xc1,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xb2,0x7d] +0xf7,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xb2,0x7d] +0x7e,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xb2,0x7d] +0x64,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xb2,0x7d] +0x02,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xb2,0x7d] +0x04,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xb2,0x7d] +0x01,0x05,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xb3,0x7d] +0x01,0xfd,0xb3,0x7d + +# GFX11: v_cmpx_lt_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xb2,0x7d] +0xfe,0x05,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xb2,0x7d] +0x6a,0x04,0xb2,0x7d + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_lt_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd9,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e32 0, v2 ; encoding: [0x80,0x04,0x6a,0x7d] +0x80,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x6a,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x6a,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x6a,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x6a,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e32 -1, v2 ; encoding: [0xc1,0x04,0x6a,0x7d] +0xc1,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x6a,0x7d] +0x7f,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x6a,0x7d] +0x7e,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 m0, v2 ; encoding: [0x7d,0x04,0x6a,0x7d] +0x7d,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 s101, v2 ; encoding: [0x65,0x04,0x6a,0x7d] +0x65,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 s1, v2 ; encoding: [0x01,0x04,0x6a,0x7d] +0x01,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 v1, v2 ; encoding: [0x01,0x05,0x6a,0x7d] +0x01,0x05,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x6a,0x7d] +0x6b,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x6a,0x7d] +0x6a,0x04,0x6a,0x7d + +# GFX11: v_cmpx_ne_i16_e64 v1, 0 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, -1 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, m0 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, s101 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, s2 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ne_i16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xb5,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x8a,0x7d] +0xf0,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 0, v2 ; encoding: [0x80,0x04,0x8a,0x7d] +0x80,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x8a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ne_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x8a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ne_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x8a,0x7d] +0xc1,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x8a,0x7d] +0xf7,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x8a,0x7d] +0x7f,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x8a,0x7d] +0x7e,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x8a,0x7d] +0x7d,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x8a,0x7d] +0x65,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x8a,0x7d] +0x01,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x8b,0x7d] +0x01,0xff,0x8b,0x7d + +# GFX11: v_cmpx_ne_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x8a,0x7d] +0x01,0x05,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x8a,0x7d] +0xff,0x05,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x8a,0x7d] +0x6b,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x8a,0x7d] +0x6a,0x04,0x8a,0x7d + +# GFX11: v_cmpx_ne_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ne_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc5,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xaa,0x7d] +0xf0,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xaa,0x7d] +0x80,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xaa,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ne_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xaa,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xaa,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ne_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xaa,0x7d] +0xc1,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xaa,0x7d] +0xf7,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xaa,0x7d] +0x7e,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xaa,0x7d] +0x64,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xaa,0x7d] +0x02,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xaa,0x7d] +0x04,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xaa,0x7d] +0x01,0x05,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xab,0x7d] +0x01,0xfd,0xab,0x7d + +# GFX11: v_cmpx_ne_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xaa,0x7d] +0xfe,0x05,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xaa,0x7d] +0x6a,0x04,0xaa,0x7d + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ne_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd5,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e32 0, v2 ; encoding: [0x80,0x04,0x1a,0x7d] +0x80,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x1a,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x1a,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x1a,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1a,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x1a,0x7d] +0xc1,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x1a,0x7d] +0x7f,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x1a,0x7d] +0x7e,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x1a,0x7d] +0x7d,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x1a,0x7d] +0x65,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x1a,0x7d] +0x01,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x1a,0x7d] +0x01,0x05,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x1a,0x7d] +0x6b,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x1a,0x7d] +0x6a,0x04,0x1a,0x7d + +# GFX11: v_cmpx_neq_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_neq_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_neq_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_neq_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_neq_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8d,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x3a,0x7d] +0xf0,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 0, v2 ; encoding: [0x80,0x04,0x3a,0x7d] +0x80,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x3a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_neq_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x3a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_neq_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x3a,0x7d] +0xc1,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x3a,0x7d] +0xf7,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x3a,0x7d] +0x7f,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x3a,0x7d] +0x7e,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x3a,0x7d] +0x7d,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x3a,0x7d] +0x65,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x3a,0x7d] +0x01,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x3b,0x7d] +0x01,0xff,0x3b,0x7d + +# GFX11: v_cmpx_neq_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x3a,0x7d] +0x01,0x05,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x3a,0x7d] +0xff,0x05,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x3a,0x7d] +0x6b,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x3a,0x7d] +0x6a,0x04,0x3a,0x7d + +# GFX11: v_cmpx_neq_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_neq_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_neq_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_neq_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_neq_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9d,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_neq_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5a,0x7d] +0xf0,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x5a,0x7d] +0x80,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_neq_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_neq_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x5a,0x7d] +0xc1,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5a,0x7d] +0xf7,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x5a,0x7d] +0x7e,0x02,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x5a,0x7d] +0x7e,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5a,0x7d] +0x64,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5a,0x7d] +0x02,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5a,0x7d] +0x04,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5a,0x7d] +0x01,0x05,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5b,0x7d] +0x01,0xfd,0x5b,0x7d + +# GFX11: v_cmpx_neq_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5a,0x7d] +0xfe,0x05,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x5a,0x7d] +0x6a,0x04,0x5a,0x7d + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xad,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xad,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xad,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xad,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xad,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xad,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xad,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xad,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_neq_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_neq_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_neq_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xad,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e32 0, v2 ; encoding: [0x80,0x04,0x7a,0x7d] +0x80,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x7a,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x7a,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x7a,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x7a,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e32 -1, v2 ; encoding: [0xc1,0x04,0x7a,0x7d] +0xc1,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x7a,0x7d] +0x7f,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x7a,0x7d] +0x7e,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 m0, v2 ; encoding: [0x7d,0x04,0x7a,0x7d] +0x7d,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 s101, v2 ; encoding: [0x65,0x04,0x7a,0x7d] +0x65,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 s1, v2 ; encoding: [0x01,0x04,0x7a,0x7d] +0x01,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 v1, v2 ; encoding: [0x01,0x05,0x7a,0x7d] +0x01,0x05,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x7a,0x7d] +0x6b,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x7a,0x7d] +0x6a,0x04,0x7a,0x7d + +# GFX11: v_cmpx_ne_u16_e64 v1, 0 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, -1 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, m0 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, s101 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, s2 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ne_u16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xbd,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x9a,0x7d] +0xf0,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 0, v2 ; encoding: [0x80,0x04,0x9a,0x7d] +0x80,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x9a,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9a,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ne_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x9a,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9a,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ne_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x9a,0x7d] +0xc1,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x9a,0x7d] +0xf7,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x9a,0x7d] +0x7f,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x9a,0x7d] +0x7e,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x9a,0x7d] +0x7d,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x9a,0x7d] +0x65,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x9a,0x7d] +0x01,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x9b,0x7d] +0x01,0xff,0x9b,0x7d + +# GFX11: v_cmpx_ne_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x9a,0x7d] +0x01,0x05,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x9a,0x7d] +0xff,0x05,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x9a,0x7d] +0x6b,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x9a,0x7d] +0x6a,0x04,0x9a,0x7d + +# GFX11: v_cmpx_ne_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ne_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xcd,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ne_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xba,0x7d] +0xf0,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xba,0x7d] +0x80,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xba,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xba,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ne_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xba,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xba,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ne_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xba,0x7d] +0xc1,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xba,0x7d] +0xf7,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xba,0x7d] +0x7e,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xba,0x7d] +0x64,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xba,0x7d] +0x02,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xba,0x7d] +0x04,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xba,0x7d] +0x01,0x05,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbb,0x7d] +0x01,0xfd,0xbb,0x7d + +# GFX11: v_cmpx_ne_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xba,0x7d] +0xfe,0x05,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xba,0x7d] +0x6a,0x04,0xba,0x7d + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ne_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xdd,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e32 0, v2 ; encoding: [0x80,0x04,0x12,0x7d] +0x80,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x12,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x12,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x12,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x12,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x12,0x7d] +0xc1,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x12,0x7d] +0x7f,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x12,0x7d] +0x7e,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x12,0x7d] +0x7d,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x12,0x7d] +0x65,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x12,0x7d] +0x01,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x12,0x7d] +0x01,0x05,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x12,0x7d] +0x6b,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x12,0x7d] +0x6a,0x04,0x12,0x7d + +# GFX11: v_cmpx_nge_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x89,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x89,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nge_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nge_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nge_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nge_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x89,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x32,0x7d] +0xf0,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 0, v2 ; encoding: [0x80,0x04,0x32,0x7d] +0x80,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x32,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x32,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nge_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x32,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x32,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nge_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x32,0x7d] +0xc1,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x32,0x7d] +0xf7,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x32,0x7d] +0x7f,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x32,0x7d] +0x7e,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x32,0x7d] +0x7d,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x32,0x7d] +0x65,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x32,0x7d] +0x01,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x33,0x7d] +0x01,0xff,0x33,0x7d + +# GFX11: v_cmpx_nge_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x32,0x7d] +0x01,0x05,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x32,0x7d] +0xff,0x05,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x32,0x7d] +0x6b,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x32,0x7d] +0x6a,0x04,0x32,0x7d + +# GFX11: v_cmpx_nge_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x99,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x99,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nge_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nge_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nge_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nge_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x99,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nge_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x52,0x7d] +0xf0,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x52,0x7d] +0x80,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x52,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x52,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nge_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x52,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x52,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nge_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x52,0x7d] +0xc1,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x52,0x7d] +0xf7,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x52,0x7d] +0x7e,0x02,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x52,0x7d] +0x7e,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x52,0x7d] +0x64,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x52,0x7d] +0x02,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x52,0x7d] +0x04,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x52,0x7d] +0x01,0x05,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x53,0x7d] +0x01,0xfd,0x53,0x7d + +# GFX11: v_cmpx_nge_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x52,0x7d] +0xfe,0x05,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x52,0x7d] +0x6a,0x04,0x52,0x7d + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_nge_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nge_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nge_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa9,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e32 0, v2 ; encoding: [0x80,0x04,0x16,0x7d] +0x80,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x16,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x16,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x16,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x16,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x16,0x7d] +0xc1,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x16,0x7d] +0x7f,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x16,0x7d] +0x7e,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x16,0x7d] +0x7d,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x16,0x7d] +0x65,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x16,0x7d] +0x01,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x16,0x7d] +0x01,0x05,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x16,0x7d] +0x6b,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x16,0x7d] +0x6a,0x04,0x16,0x7d + +# GFX11: v_cmpx_ngt_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ngt_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ngt_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ngt_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ngt_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8b,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x36,0x7d] +0xf0,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 0, v2 ; encoding: [0x80,0x04,0x36,0x7d] +0x80,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x36,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x36,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ngt_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x36,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x36,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ngt_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x36,0x7d] +0xc1,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x36,0x7d] +0xf7,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x36,0x7d] +0x7f,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x36,0x7d] +0x7e,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x36,0x7d] +0x7d,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x36,0x7d] +0x65,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x36,0x7d] +0x01,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x37,0x7d] +0x01,0xff,0x37,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x36,0x7d] +0x01,0x05,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x36,0x7d] +0xff,0x05,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x36,0x7d] +0x6b,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x36,0x7d] +0x6a,0x04,0x36,0x7d + +# GFX11: v_cmpx_ngt_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ngt_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ngt_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ngt_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_ngt_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9b,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_ngt_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x56,0x7d] +0xf0,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x56,0x7d] +0x80,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x56,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x56,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_ngt_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x56,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x56,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_ngt_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x56,0x7d] +0xc1,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x56,0x7d] +0xf7,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x56,0x7d] +0x7e,0x02,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x56,0x7d] +0x7e,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x56,0x7d] +0x64,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x56,0x7d] +0x02,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x56,0x7d] +0x04,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x56,0x7d] +0x01,0x05,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x57,0x7d] +0x01,0xfd,0x57,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x56,0x7d] +0xfe,0x05,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x56,0x7d] +0x6a,0x04,0x56,0x7d + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xab,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xab,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xab,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xab,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xab,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xab,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xab,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xab,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_ngt_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_ngt_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_ngt_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xab,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e32 0, v2 ; encoding: [0x80,0x04,0x18,0x7d] +0x80,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x18,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x18,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x18,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x18,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x18,0x7d] +0xc1,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x18,0x7d] +0x7f,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x18,0x7d] +0x7e,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x18,0x7d] +0x7d,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x18,0x7d] +0x65,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x18,0x7d] +0x01,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x18,0x7d] +0x01,0x05,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x18,0x7d] +0x6b,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x18,0x7d] +0x6a,0x04,0x18,0x7d + +# GFX11: v_cmpx_nle_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nle_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nle_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nle_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nle_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8c,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x38,0x7d] +0xf0,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 0, v2 ; encoding: [0x80,0x04,0x38,0x7d] +0x80,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x38,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x38,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nle_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x38,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x38,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nle_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x38,0x7d] +0xc1,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x38,0x7d] +0xf7,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x38,0x7d] +0x7f,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x38,0x7d] +0x7e,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x38,0x7d] +0x7d,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x38,0x7d] +0x65,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x38,0x7d] +0x01,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x39,0x7d] +0x01,0xff,0x39,0x7d + +# GFX11: v_cmpx_nle_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x38,0x7d] +0x01,0x05,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x38,0x7d] +0xff,0x05,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x38,0x7d] +0x6b,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x38,0x7d] +0x6a,0x04,0x38,0x7d + +# GFX11: v_cmpx_nle_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nle_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nle_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nle_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nle_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9c,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nle_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x58,0x7d] +0xf0,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x58,0x7d] +0x80,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x58,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x58,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nle_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x58,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x58,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nle_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x58,0x7d] +0xc1,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x58,0x7d] +0xf7,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x58,0x7d] +0x7e,0x02,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x58,0x7d] +0x7e,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x58,0x7d] +0x64,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x58,0x7d] +0x02,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x58,0x7d] +0x04,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x58,0x7d] +0x01,0x05,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x59,0x7d] +0x01,0xfd,0x59,0x7d + +# GFX11: v_cmpx_nle_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x58,0x7d] +0xfe,0x05,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x58,0x7d] +0x6a,0x04,0x58,0x7d + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xac,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xac,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xac,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xac,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xac,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xac,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xac,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xac,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_nle_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nle_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nle_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xac,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e32 0, v2 ; encoding: [0x80,0x04,0x14,0x7d] +0x80,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x14,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x14,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x14,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x14,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x14,0x7d] +0xc1,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x14,0x7d] +0x7f,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x14,0x7d] +0x7e,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x14,0x7d] +0x7d,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x14,0x7d] +0x65,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x14,0x7d] +0x01,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x14,0x7d] +0x01,0x05,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x14,0x7d] +0x6b,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x14,0x7d] +0x6a,0x04,0x14,0x7d + +# GFX11: v_cmpx_nlg_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlg_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlg_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlg_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nlg_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8a,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x34,0x7d] +0xf0,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 0, v2 ; encoding: [0x80,0x04,0x34,0x7d] +0x80,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x34,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x34,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nlg_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x34,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x34,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nlg_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x34,0x7d] +0xc1,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x34,0x7d] +0xf7,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x34,0x7d] +0x7f,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x34,0x7d] +0x7e,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x34,0x7d] +0x7d,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x34,0x7d] +0x65,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x34,0x7d] +0x01,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x35,0x7d] +0x01,0xff,0x35,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x34,0x7d] +0x01,0x05,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x34,0x7d] +0xff,0x05,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x34,0x7d] +0x6b,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x34,0x7d] +0x6a,0x04,0x34,0x7d + +# GFX11: v_cmpx_nlg_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlg_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlg_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlg_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nlg_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9a,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlg_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x54,0x7d] +0xf0,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x54,0x7d] +0x80,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x54,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x54,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nlg_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x54,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x54,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nlg_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x54,0x7d] +0xc1,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x54,0x7d] +0xf7,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x54,0x7d] +0x7e,0x02,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x54,0x7d] +0x7e,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x54,0x7d] +0x64,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x54,0x7d] +0x02,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x54,0x7d] +0x04,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x54,0x7d] +0x01,0x05,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x55,0x7d] +0x01,0xfd,0x55,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x54,0x7d] +0xfe,0x05,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x54,0x7d] +0x6a,0x04,0x54,0x7d + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_nlg_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlg_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlg_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xaa,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e32 0, v2 ; encoding: [0x80,0x04,0x1c,0x7d] +0x80,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x1c,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x1c,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x1c,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1c,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x1c,0x7d] +0xc1,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x1c,0x7d] +0x7f,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x1c,0x7d] +0x7e,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x1c,0x7d] +0x7d,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x1c,0x7d] +0x65,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x1c,0x7d] +0x01,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x1c,0x7d] +0x01,0x05,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x1c,0x7d] +0x6b,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x1c,0x7d] +0x6a,0x04,0x1c,0x7d + +# GFX11: v_cmpx_nlt_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlt_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlt_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlt_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nlt_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8e,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x3c,0x7d] +0xf0,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 0, v2 ; encoding: [0x80,0x04,0x3c,0x7d] +0x80,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x3c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nlt_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x3c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nlt_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x3c,0x7d] +0xc1,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x3c,0x7d] +0xf7,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x3c,0x7d] +0x7f,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x3c,0x7d] +0x7e,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x3c,0x7d] +0x7d,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x3c,0x7d] +0x65,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x3c,0x7d] +0x01,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x3d,0x7d] +0x01,0xff,0x3d,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x3c,0x7d] +0x01,0x05,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x3c,0x7d] +0xff,0x05,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x3c,0x7d] +0x6b,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x3c,0x7d] +0x6a,0x04,0x3c,0x7d + +# GFX11: v_cmpx_nlt_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlt_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlt_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlt_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_nlt_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9e,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_nlt_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5c,0x7d] +0xf0,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x5c,0x7d] +0x80,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5c,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_nlt_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5c,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5c,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_nlt_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x5c,0x7d] +0xc1,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5c,0x7d] +0xf7,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x5c,0x7d] +0x7e,0x02,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x5c,0x7d] +0x7e,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5c,0x7d] +0x64,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5c,0x7d] +0x02,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5c,0x7d] +0x04,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5c,0x7d] +0x01,0x05,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5d,0x7d] +0x01,0xfd,0x5d,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5c,0x7d] +0xfe,0x05,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x5c,0x7d] +0x6a,0x04,0x5c,0x7d + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xae,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xae,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xae,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xae,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xae,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xae,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xae,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xae,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_nlt_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_nlt_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_nlt_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xae,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e32 0, v2 ; encoding: [0x80,0x04,0x0e,0x7d] +0x80,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x0e,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x0e,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x0e,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x0e,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x0e,0x7d] +0xc1,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x0e,0x7d] +0x7f,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x0e,0x7d] +0x7e,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x0e,0x7d] +0x7d,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x0e,0x7d] +0x65,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x0e,0x7d] +0x01,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x0e,0x7d] +0x01,0x05,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x0e,0x7d] +0x6b,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x0e,0x7d] +0x6a,0x04,0x0e,0x7d + +# GFX11: v_cmpx_o_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x87,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x87,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_o_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_o_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_o_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_o_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x87,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x2e,0x7d] +0xf0,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 0, v2 ; encoding: [0x80,0x04,0x2e,0x7d] +0x80,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x2e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x2e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_o_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x2e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_o_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x2e,0x7d] +0xc1,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x2e,0x7d] +0xf7,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x2e,0x7d] +0x7f,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x2e,0x7d] +0x7e,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x2e,0x7d] +0x7d,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x2e,0x7d] +0x65,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x2e,0x7d] +0x01,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x2f,0x7d] +0x01,0xff,0x2f,0x7d + +# GFX11: v_cmpx_o_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x2e,0x7d] +0x01,0x05,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x2e,0x7d] +0xff,0x05,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x2e,0x7d] +0x6b,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x2e,0x7d] +0x6a,0x04,0x2e,0x7d + +# GFX11: v_cmpx_o_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x97,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x97,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_o_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_o_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_o_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_o_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x97,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_o_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x4e,0x7d] +0xf0,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x4e,0x7d] +0x80,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x4e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_o_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x4e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_o_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x4e,0x7d] +0xc1,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x4e,0x7d] +0xf7,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x4e,0x7d] +0x7e,0x02,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x4e,0x7d] +0x7e,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x4e,0x7d] +0x64,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x4e,0x7d] +0x02,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x4e,0x7d] +0x04,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x4e,0x7d] +0x01,0x05,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x4f,0x7d] +0x01,0xfd,0x4f,0x7d + +# GFX11: v_cmpx_o_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x4e,0x7d] +0xfe,0x05,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x4e,0x7d] +0x6a,0x04,0x4e,0x7d + +# GFX11: v_cmpx_o_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_o_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_o_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_o_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa7,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e32 0, v2 ; encoding: [0x80,0x04,0x1e,0x7d] +0x80,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x1e,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x1e,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x1e,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x1e,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x1e,0x7d] +0xc1,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_t_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_t_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x8f,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_t_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x8f,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x1e,0x7d] +0x7f,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x1e,0x7d] +0x7e,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x1e,0x7d] +0x7d,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x1e,0x7d] +0x65,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x1e,0x7d] +0x01,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x1e,0x7d] +0x01,0x05,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x1e,0x7d] +0x6b,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x1e,0x7d] +0x6a,0x04,0x1e,0x7d + +# GFX11: v_cmpx_t_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x3e,0x7d] +0xf0,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 0, v2 ; encoding: [0x80,0x04,0x3e,0x7d] +0x80,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x3e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x3e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x3e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x3e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x3e,0x7d] +0xc1,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x3e,0x7d] +0xf7,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_t_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_t_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_t_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x9f,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x3e,0x7d] +0x7f,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x3e,0x7d] +0x7e,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x3e,0x7d] +0x7d,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x3e,0x7d] +0x65,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x3e,0x7d] +0x01,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x3f,0x7d] +0x01,0xff,0x3f,0x7d + +# GFX11: v_cmpx_t_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x3e,0x7d] +0x01,0x05,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x3e,0x7d] +0xff,0x05,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x3e,0x7d] +0x6b,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x3e,0x7d] +0x6a,0x04,0x3e,0x7d + +# GFX11: v_cmpx_t_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x5e,0x7d] +0xf0,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x5e,0x7d] +0x80,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x5e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x5e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x5e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x5e,0x7d] +0xc1,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x5e,0x7d] +0xf7,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_t_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_t_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xaf,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_t_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xaf,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xaf,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x5e,0x7d] +0x7e,0x02,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x5e,0x7d] +0x7e,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x5e,0x7d] +0x64,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x5e,0x7d] +0x02,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x5e,0x7d] +0x04,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x5e,0x7d] +0x01,0x05,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x5f,0x7d] +0x01,0xfd,0x5f,0x7d + +# GFX11: v_cmpx_t_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x5e,0x7d] +0xfe,0x05,0x5e,0x7d + +# GFX11: v_cmpx_t_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x5e,0x7d] +0x6a,0x04,0x5e,0x7d + +# GFX11: v_cmpx_t_i32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x8e,0x7d] +0xf0,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 0, v2 ; encoding: [0x80,0x04,0x8e,0x7d] +0x80,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x8e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x8e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_i32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x8e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_i32_e32 -1, v2 ; encoding: [0xc1,0x04,0x8e,0x7d] +0xc1,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x8e,0x7d] +0xf7,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x8e,0x7d] +0x7f,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x8e,0x7d] +0x7e,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 m0, v2 ; encoding: [0x7d,0x04,0x8e,0x7d] +0x7d,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 s101, v2 ; encoding: [0x65,0x04,0x8e,0x7d] +0x65,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 s1, v2 ; encoding: [0x01,0x04,0x8e,0x7d] +0x01,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 v1, v255 ; encoding: [0x01,0xff,0x8f,0x7d] +0x01,0xff,0x8f,0x7d + +# GFX11: v_cmpx_t_i32_e32 v1, v2 ; encoding: [0x01,0x05,0x8e,0x7d] +0x01,0x05,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 v255, v2 ; encoding: [0xff,0x05,0x8e,0x7d] +0xff,0x05,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x8e,0x7d] +0x6b,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x8e,0x7d] +0x6a,0x04,0x8e,0x7d + +# GFX11: v_cmpx_t_i32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, 0 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, -1 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, m0 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, s101 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, s2 ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_t_i32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xc7,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xc7,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_i64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xae,0x7d] +0xf0,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xae,0x7d] +0x80,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xae,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xae,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_i64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xae,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xae,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_i64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xae,0x7d] +0xc1,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xae,0x7d] +0xf7,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xae,0x7d] +0x7e,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xae,0x7d] +0x64,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xae,0x7d] +0x02,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xae,0x7d] +0x04,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xae,0x7d] +0x01,0x05,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xaf,0x7d] +0x01,0xfd,0xaf,0x7d + +# GFX11: v_cmpx_t_i64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xae,0x7d] +0xfe,0x05,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xae,0x7d] +0x6a,0x04,0xae,0x7d + +# GFX11: v_cmpx_t_i64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_t_i64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xd7,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xd7,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x9e,0x7d] +0xf0,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 0, v2 ; encoding: [0x80,0x04,0x9e,0x7d] +0x80,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x9e,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x9e,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_u32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x9e,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x9e,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_u32_e32 -1, v2 ; encoding: [0xc1,0x04,0x9e,0x7d] +0xc1,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x9e,0x7d] +0xf7,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x9e,0x7d] +0x7f,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x9e,0x7d] +0x7e,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 m0, v2 ; encoding: [0x7d,0x04,0x9e,0x7d] +0x7d,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 s101, v2 ; encoding: [0x65,0x04,0x9e,0x7d] +0x65,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 s1, v2 ; encoding: [0x01,0x04,0x9e,0x7d] +0x01,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 v1, v255 ; encoding: [0x01,0xff,0x9f,0x7d] +0x01,0xff,0x9f,0x7d + +# GFX11: v_cmpx_t_u32_e32 v1, v2 ; encoding: [0x01,0x05,0x9e,0x7d] +0x01,0x05,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 v255, v2 ; encoding: [0xff,0x05,0x9e,0x7d] +0xff,0x05,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x9e,0x7d] +0x6b,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x9e,0x7d] +0x6a,0x04,0x9e,0x7d + +# GFX11: v_cmpx_t_u32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, 0 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, -1 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, m0 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, s101 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, s2 ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_t_u32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0xcf,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xcf,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_t_u64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0xbe,0x7d] +0xf0,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 0, v[2:3] ; encoding: [0x80,0x04,0xbe,0x7d] +0x80,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0xbe,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_t_u64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0xbe,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0xbe,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_t_u64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0xbe,0x7d] +0xc1,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0xbe,0x7d] +0xf7,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0xbe,0x7d] +0x7e,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0xbe,0x7d] +0x64,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0xbe,0x7d] +0x02,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0xbe,0x7d] +0x04,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0xbe,0x7d] +0x01,0x05,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0xbf,0x7d] +0x01,0xfd,0xbf,0x7d + +# GFX11: v_cmpx_t_u64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0xbe,0x7d] +0xfe,0x05,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0xbe,0x7d] +0x6a,0x04,0xbe,0x7d + +# GFX11: v_cmpx_t_u64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_t_u64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xdf,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xdf,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e32 0, v2 ; encoding: [0x80,0x04,0x10,0x7d] +0x80,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 0x3456, v2 ; encoding: [0xff,0x04,0x10,0x7d,0x56,0x34,0x00,0x00] +0xff,0x04,0x10,0x7d,0x56,0x34,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e32 0xfe0b, v2 ; encoding: [0xff,0x04,0x10,0x7d,0x0b,0xfe,0x00,0x00] +0xff,0x04,0x10,0x7d,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d] +0xc1,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x10,0x7d] +0x7f,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x10,0x7d] +0x7e,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x10,0x7d] +0x7d,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 s101, v2 ; encoding: [0x65,0x04,0x10,0x7d] +0x65,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x10,0x7d] +0x01,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x10,0x7d] +0x01,0x05,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x10,0x7d] +0x6b,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x10,0x7d] +0x6a,0x04,0x10,0x7d + +# GFX11: v_cmpx_u_f16_e64 v1, 0 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x88,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, -1 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x88,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, m0 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, s101 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, s2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 -v1, v2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_u_f16_e64 v1, -v2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_u_f16_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_u_f16_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_u_f16_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x88,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e32 0.5, v2 ; encoding: [0xf0,0x04,0x30,0x7d] +0xf0,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 0, v2 ; encoding: [0x80,0x04,0x30,0x7d] +0x80,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 0x3f717273, v2 ; encoding: [0xff,0x04,0x30,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x30,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_u_f32_e32 0xaf123456, v2 ; encoding: [0xff,0x04,0x30,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x30,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_u_f32_e32 -1, v2 ; encoding: [0xc1,0x04,0x30,0x7d] +0xc1,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 -4.0, v2 ; encoding: [0xf7,0x04,0x30,0x7d] +0xf7,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x30,0x7d] +0x7f,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x30,0x7d] +0x7e,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 m0, v2 ; encoding: [0x7d,0x04,0x30,0x7d] +0x7d,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 s101, v2 ; encoding: [0x65,0x04,0x30,0x7d] +0x65,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 s1, v2 ; encoding: [0x01,0x04,0x30,0x7d] +0x01,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 v1, v255 ; encoding: [0x01,0xff,0x31,0x7d] +0x01,0xff,0x31,0x7d + +# GFX11: v_cmpx_u_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x30,0x7d] +0x01,0x05,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 v255, v2 ; encoding: [0xff,0x05,0x30,0x7d] +0xff,0x05,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x30,0x7d] +0x6b,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x30,0x7d] +0x6a,0x04,0x30,0x7d + +# GFX11: v_cmpx_u_f32_e64 v1, 0.5 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, 0 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0x98,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, -1 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0x98,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, -4.0 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, exec_hi ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xff,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xff,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, exec_lo ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, m0 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xfb,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xfb,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, s101 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xcb,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xcb,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, s2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0x05,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 -v1, v2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_u_f32_e64 v1, -v2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_u_f32_e64 -v1, -v2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_u_f32_e64 v1, vcc_hi ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xd7,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xd7,0x00,0x00 + +# GFX11: v_cmpx_u_f32_e64 v1, vcc_lo ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0x98,0xd4,0x01,0xd5,0x00,0x00 + +# GFX11: v_cmpx_u_f64_e32 0.5, v[2:3] ; encoding: [0xf0,0x04,0x50,0x7d] +0xf0,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 0, v[2:3] ; encoding: [0x80,0x04,0x50,0x7d] +0x80,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 0x3f717273, v[2:3] ; encoding: [0xff,0x04,0x50,0x7d,0x73,0x72,0x71,0x3f] +0xff,0x04,0x50,0x7d,0x73,0x72,0x71,0x3f + +# GFX11: v_cmpx_u_f64_e32 0xaf123456, v[2:3] ; encoding: [0xff,0x04,0x50,0x7d,0x56,0x34,0x12,0xaf] +0xff,0x04,0x50,0x7d,0x56,0x34,0x12,0xaf + +# GFX11: v_cmpx_u_f64_e32 -1, v[2:3] ; encoding: [0xc1,0x04,0x50,0x7d] +0xc1,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 -4.0, v[2:3] ; encoding: [0xf7,0x04,0x50,0x7d] +0xf7,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 exec, v[1:2] ; encoding: [0x7e,0x02,0x50,0x7d] +0x7e,0x02,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 exec, v[2:3] ; encoding: [0x7e,0x04,0x50,0x7d] +0x7e,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 s[100:101], v[2:3] ; encoding: [0x64,0x04,0x50,0x7d] +0x64,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 s[2:3], v[2:3] ; encoding: [0x02,0x04,0x50,0x7d] +0x02,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 s[4:5], v[2:3] ; encoding: [0x04,0x04,0x50,0x7d] +0x04,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 v[1:2], v[2:3] ; encoding: [0x01,0x05,0x50,0x7d] +0x01,0x05,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 v[1:2], v[254:255] ; encoding: [0x01,0xfd,0x51,0x7d] +0x01,0xfd,0x51,0x7d + +# GFX11: v_cmpx_u_f64_e32 v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x50,0x7d] +0xfe,0x05,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e32 vcc, v[2:3] ; encoding: [0x6a,0x04,0x50,0x7d] +0x6a,0x04,0x50,0x7d + +# GFX11: v_cmpx_u_f64_e64 v[1:2], 0.5 ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0xe1,0x01,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0xe1,0x01,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], 0 ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x01,0x01,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0x01,0x01,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], -1 ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x83,0x01,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0x83,0x01,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], -4.0 ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0xef,0x01,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0xef,0x01,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], exec ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0xfd,0x00,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0xfd,0x00,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], s[100:101] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0xc9,0x00,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0xc9,0x00,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], s[4:5] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x09,0x00,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0x09,0x00,0x00 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], s[6:7] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x0d,0x00,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0x0d,0x00,0x00 + +# GFX11: v_cmpx_u_f64_e64 -v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x20] +0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x20 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x40] +0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x40 + +# GFX11: v_cmpx_u_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60] +0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60 + +# GFX11: v_cmpx_u_f64_e64 v[1:2], vcc ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0xd5,0x00,0x00] +0x7e,0x00,0xa8,0xd4,0x01,0xd5,0x00,0x00 + +# Check that dst value does not affect disassembly +# GFX11: v_cmpx_u_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60] +0x00,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60 + +# Check that dst value does not affect disassembly +# GFX11: v_cmpx_u_f64_e64 -v[1:2], -v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60] +0xff,0x00,0xa8,0xd4,0x01,0x05,0x02,0x60 + +# W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0x00] +# W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0x00 + +# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0x00 + +# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x64,0x01,0x1b,0x04,0x00 + +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0x00 + +# GFX11: v_add_f32_dpp v5, -v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x10,0x00] +0xfa,0x04,0x0a,0x06,0x01,0x1b,0x10,0x00 + +# GFX11: v_add_f32_dpp v5, v1, -v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x40,0x00] +0xfa,0x04,0x0a,0x06,0x01,0x1b,0x40,0x00 + +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x06,0x01,0x1b,0x04,0x00 + +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00 + +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x04,0x00 + +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0x00 + +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x36,0x01,0x1b,0x04,0x00 + +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0x00 + +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x34,0x01,0x1b,0x04,0x00 + +# GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_ceil_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_ceil_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_ceil_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_ceil_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cls_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cls_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x76,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_ctz_i32_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_ctz_i32_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x74,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_clz_i32_u32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_clz_i32_u32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x72,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cos_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cos_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xc2,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cos_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cos_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x6c,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f16_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f16_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f16_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f16_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f16_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f16_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xa0,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_u32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_u32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x0c,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_ubyte0_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x22,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_ubyte1_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x24,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_ubyte2_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x26,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_f32_ubyte3_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x28,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_floor_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x1a,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xa6,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x10,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_nearest_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x18,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_norm_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xc6,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xc8,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_off_f32_i4_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x1c,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xa4,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_cvt_u32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_cvt_u32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x0e,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_exp_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_exp_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_exp_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_exp_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x4a,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_floor_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_floor_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_floor_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_floor_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0x00 + +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x04,0x00 + +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0x00 + +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x56,0x01,0x1b,0x04,0x00 + +# GFX11: v_fract_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_fract_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xbe,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_fract_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_fract_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x40,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_frexp_exp_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xb4,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_frexp_exp_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x7e,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_frexp_mant_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_frexp_mant_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xb2,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_frexp_mant_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_frexp_mant_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x80,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0x00 + +# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x76,0x01,0x1b,0x04,0x00 + +# GFX11: v_log_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_log_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xae,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_log_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_log_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x4e,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0x00 + +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x30,0x01,0x1b,0x04,0x00 + +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0x00 + +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x32,0x01,0x1b,0x04,0x00 + +# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0x00 + +# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x72,0x01,0x1b,0x04,0x00 + +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0x00 + +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x20,0x01,0x1b,0x04,0x00 + +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0x00 + +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x24,0x01,0x1b,0x04,0x00 + +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0x00 + +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x28,0x01,0x1b,0x04,0x00 + +# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0x00 + +# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x74,0x01,0x1b,0x04,0x00 + +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0x00 + +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x04,0x00 + +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0x00 + +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x22,0x01,0x1b,0x04,0x00 + +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0x00 + +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x26,0x01,0x1b,0x04,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0xe4,0x00,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0xe4,0x00,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x01] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x01 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x03] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x03 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x0f] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x0f + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x10] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x10 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x30] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x30 + +# GFX11: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xf0] +0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0xf0 + +# GFX11: v_mov_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x41,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x41,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x40,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x40,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x2f,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x2f,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x21,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x21,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_share:0 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x50,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x50,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x5f,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x5f,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x0f,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x0f,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x01,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x01,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1f,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x1f,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x11,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x11,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_xmask:0 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x60,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x60,0x01,0x00 + +# GFX11: v_mov_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x6f,0x01,0x00] +0xfa,0x02,0x0a,0x7e,0x01,0x6f,0x01,0x00 + +# GFX11: v_movreld_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00] +0xfa,0x84,0x02,0x7e,0x00,0x1b,0x00,0x00 + +# GFX11: v_movrels_b32_dpp v1, v0 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00] +0xfa,0x86,0x02,0x7e,0x00,0x1b,0x04,0x00 + +# GFX11: v_movrelsd_2_b32_dpp v0, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00] +0xfa,0x90,0x00,0x7e,0x02,0x1b,0x00,0x00 + +# GFX11: v_movrelsd_b32_dpp v0, v255 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00] +0xfa,0x88,0x00,0x7e,0xff,0x1b,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x10,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x14,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x18,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x12,0x01,0x1b,0x04,0x00 + +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0x00 + +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x16,0x01,0x1b,0x04,0x00 + +# GFX11: v_not_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_not_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x6e,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0x00 + +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x38,0x01,0x1b,0x04,0x00 + +# GFX11: v_rcp_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rcp_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xa8,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rcp_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rcp_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x54,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rcp_iflag_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x56,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rndne_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rndne_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rndne_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rndne_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rsq_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rsq_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xac,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_rsq_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_rsq_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x5c,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_sin_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_sin_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x6a,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_sqrt_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_sqrt_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xaa,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_sqrt_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_sqrt_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x66,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0x00] +# W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0x00 + +# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0x00 + +# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x66,0x01,0x1b,0x04,0x00 + +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0x00 + +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x08,0x01,0x1b,0x04,0x00 + +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x04,0x00 + +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0x00 + +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0x00 + +# W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0x00] +# W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0x00 + +# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0x00 + +# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x68,0x01,0x1b,0x04,0x00 + +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0x00 + +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x04,0x00 + +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x04,0x00 + +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0x10] +0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0x10 + +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x6f,0x01,0x00] +0xfa,0x04,0x0a,0x4e,0x01,0x6f,0x01,0x00 + +# GFX11: v_trunc_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_trunc_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_trunc_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0x00] +0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0x00 + +# GFX11: v_trunc_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x04,0x00] +0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x04,0x00 + +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0x00 + +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x04,0x00 + +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0x00 + +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x04,0x00] +0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x04,0x00 + +# GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x64,0x01,0x88,0xc6,0xfa + +# GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x64,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x64,0x01,0x88,0xc6,0xfa + +# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x06,0x01,0x88,0xc6,0xfa + +# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x06,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x06,0x01,0x88,0xc6,0xfa + +# GFX11: v_add_nc_u32_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0b,0x4a,0x01,0x77,0x39,0x05] +0xe9,0xfe,0x0b,0x4a,0x01,0x77,0x39,0x05 + +# GFX11: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] +0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05 + +# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x36,0x01,0x88,0xc6,0xfa + +# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x36,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x36,0x01,0x88,0xc6,0xfa + +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x34,0x01,0x88,0xc6,0xfa + +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x34,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x34,0x01,0x88,0xc6,0xfa + +# GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x70,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ceil_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ceil_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xb8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ceil_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ceil_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x44,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cls_i32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cls_i32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x76,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ctz_i32_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ctz_i32_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x74,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_clz_i32_u32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_clz_i32_u32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x72,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cos_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cos_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xc2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cos_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cos_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x6c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_i16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_i16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xa2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_u16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f16_u16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xa0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x16,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_i32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_i32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x0a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_u32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_u32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x0c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte0_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte0_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x22,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte1_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte1_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x24,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte2_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte2_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x26,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte3_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_f32_ubyte3_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x28,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_floor_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_floor_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x1a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xa6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_nearest_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_nearest_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x18,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_norm_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_norm_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xc6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_norm_u16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_norm_u16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xc8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_off_f32_i4_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_off_f32_i4_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x1c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_u16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_u16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xa4,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x0e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_exp_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_exp_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xb0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_exp_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_exp_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x4a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_floor_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_floor_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xb6,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_floor_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_floor_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x48,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_fract_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_fract_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xbe,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_fract_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_fract_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x40,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_exp_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_exp_i16_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xb4,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_exp_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_exp_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x7e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_mant_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_mant_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xb2,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_mant_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_frexp_mant_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x80,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_ldexp_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa + +# GFX11: v_ldexp_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa + +# GFX11: v_log_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_log_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xae,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_log_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_log_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x4e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x30,0x01,0x88,0xc6,0xfa + +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x30,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x30,0x01,0x88,0xc6,0xfa + +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x32,0x01,0x88,0xc6,0xfa + +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x32,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x32,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x72,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x72,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x72,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x20,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x20,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x20,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x24,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x24,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x24,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x28,0x01,0x88,0xc6,0xfa + +# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x28,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x28,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x74,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x74,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x74,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x1e,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x1e,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x1e,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x22,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x22,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x22,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x26,0x01,0x88,0xc6,0xfa + +# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x26,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x26,0x01,0x88,0xc6,0xfa + +# GFX11: v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x0e,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x0e,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x0e,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x6a,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x6a,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x6a,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x10,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x10,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x10,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x14,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x14,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x14,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x18,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x18,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x18,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x12,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x12,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x12,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x16,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x16,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x16,0x01,0x88,0xc6,0xfa + +# GFX11: v_not_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_not_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x6e,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa + +# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xa8,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x54,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_iflag_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rcp_iflag_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x56,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rndne_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rndne_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xbc,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rndne_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rndne_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x46,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rsq_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rsq_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xac,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rsq_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_rsq_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x5c,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sin_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sin_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xc0,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sin_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sin_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x6a,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sqrt_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sqrt_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xaa,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sqrt_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sqrt_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x66,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_sub_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x66,0x01,0x88,0xc6,0xfa + +# GFX11: v_sub_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x66,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x66,0x01,0x88,0xc6,0xfa + +# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x08,0x01,0x88,0xc6,0xfa + +# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x08,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x08,0x01,0x88,0xc6,0xfa + +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] +0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x68,0x01,0x88,0xc6,0xfa + +# GFX11: v_subrev_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x68,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x68,0x01,0x88,0xc6,0xfa + +# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x0a,0x01,0x88,0xc6,0xfa + +# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x0a,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x0a,0x01,0x88,0xc6,0xfa + +# GFX11: v_subrev_nc_u32_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0b,0x4e,0x01,0x77,0x39,0x05] +0xe9,0xfe,0x0b,0x4e,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] +0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_trunc_f16_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0xba,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_trunc_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xe9,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_trunc_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa] +0xea,0x42,0x0a,0x7e,0x01,0x88,0xc6,0xfa + +# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x3c,0x01,0x88,0xc6,0xfa + +# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x3c,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x3c,0x01,0x88,0xc6,0xfa + +# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa] +0xe9,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa + +# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa + +# W32: v_add_co_ci_u32_e64_dpp v0, s2, v1, v2, s1 clamp dpp8:[7,6,5,3,4,2,1,0] fi:1 ; encoding: [0x00,0x82,0x20,0xd5,0xea,0x04,0x06,0x00,0x01,0x77,0x47,0x05] +# W64: v_add_co_ci_u32_e64_dpp v0, s[2:3], v1, v2, s[0:1] clamp dpp8:[7,6,5,3,4,2,1,0] fi:1 ; encoding: [0x00,0x82,0x20,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x47,0x05] +0x00,0x82,0x20,0xd5,0xea,0x04,0x06,0x00,0x01,0x77,0x47,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v0, s5, v1, v2, vcc_hi quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0x05,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x78,0x00,0x0f] +0x00,0x05,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x78,0x00,0x0f + +# W32: v_add_co_u32_e64_dpp v5, s4, v1, v2 clamp quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x05,0x84,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x7a,0x0c,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[4:5], v1, v2 clamp quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x05,0x84,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x7a,0x0c,0xff] +0x05,0x84,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x7a,0x0c,0xff + +# GFX11: v_add_nc_u32_e64_dpp v60, v61, v62 dpp8:[7,6,5,3,4,2,1,0] fi:1 ; encoding: [0x3c,0x00,0x25,0xd5,0xea,0x7c,0x02,0x00,0x3d,0x77,0x47,0x05] +0x3c,0x00,0x25,0xd5,0xea,0x7c,0x02,0x00,0x3d,0x77,0x47,0x05 + +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_bfe_u32_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00] +0x00,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x00 + +# GFX11: v_cls_i32_e64_dpp v5, v1 dpp8:[0,0,2,3,4,4,6,7] ; encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x80,0x46,0xfa] +0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x80,0x46,0xfa + +# W32: v_cndmask_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v5, v1, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0a,0x00] +0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x00 + +# W32: v_cndmask_b16 v255, v1, v2, s3 ; encoding: [0xff,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v255, v1, v2, s[2:3] ; encoding: [0xff,0x00,0x5d,0xd6,0x01,0x05,0x0a,0x00] +0xff,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x00 + +# W32: v_cndmask_b16 v5, v255, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v5, v255, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x05,0x0a,0x00] +0x05,0x00,0x5d,0xd6,0xff,0x05,0x0e,0x00 + +# W32: v_cndmask_b16 v5, vcc_lo, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6a,0x04,0x0e,0x00] +# W64: v_cndmask_b16 v5, vcc_lo, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x6a,0x04,0x0a,0x00] +0x05,0x00,0x5d,0xd6,0x6a,0x04,0x0e,0x00 + +# W32: v_cndmask_b16 v5, 0, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x80,0x04,0x0e,0x00] +# W64: v_cndmask_b16 v5, 0, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x80,0x04,0x0a,0x00] +0x05,0x00,0x5d,0xd6,0x80,0x04,0x0e,0x00 + +# W32: v_cndmask_b16 v5, -1, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xc1,0x04,0x0e,0x00] +# W64: v_cndmask_b16 v5, -1, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0xc1,0x04,0x0a,0x00] +0x05,0x00,0x5d,0xd6,0xc1,0x04,0x0e,0x00 + +# W32: v_cndmask_b16 v5, v1, v255, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xff,0x0f,0x00] +# W64: v_cndmask_b16 v5, v1, v255, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xff,0x0b,0x00] +0x05,0x00,0x5d,0xd6,0x01,0xff,0x0f,0x00 + +# W32: v_cndmask_b16 v5, v1, vcc_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xd5,0x0c,0x00] +# W64: v_cndmask_b16 v5, v1, vcc_lo, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xd5,0x08,0x00] +0x05,0x00,0x5d,0xd6,0x01,0xd5,0x0c,0x00 + +# W32: v_cndmask_b16 v5, v1, 0, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x01,0x0d,0x00] +# W64: v_cndmask_b16 v5, v1, 0, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x01,0x09,0x00] +0x05,0x00,0x5d,0xd6,0x01,0x01,0x0d,0x00 + +# W32: v_cndmask_b16 v5, v1, -1, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x83,0x0d,0x00] +# W64: v_cndmask_b16 v5, v1, -1, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x83,0x09,0x00] +0x05,0x00,0x5d,0xd6,0x01,0x83,0x0d,0x00 + +# W32: v_cndmask_b16 v5, v1, v2, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xa6,0x01] +# W64: v_cndmask_b16 v5, v1, v2, s[104:105] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xa2,0x01] +0x05,0x00,0x5d,0xd6,0x01,0x05,0xa6,0x01 + +# W32: v_cndmask_b16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xaa,0x01] +# W64: v_cndmask_b16 v5, v1, v2, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xaa,0x01] +0x05,0x00,0x5d,0xd6,0x01,0x05,0xaa,0x01 + +# W32: v_cndmask_b16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xae,0x01] +0x05,0x00,0x5d,0xd6,0x01,0x05,0xae,0x01 + +# W32: v_cndmask_b16 v5, v1, v2, ttmp15 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xee,0x01] +# W64: v_cndmask_b16 v5, v1, v2, ttmp[14:15] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0xea,0x01] +0x05,0x00,0x5d,0xd6,0x01,0x05,0xee,0x01 + +# W32: v_cndmask_b16 v5, -v1, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x20] +# W64: v_cndmask_b16 v5, -v1, v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0a,0x20] +0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x20 + +# W32: v_cndmask_b16 v5, v1, -v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x40] +# W64: v_cndmask_b16 v5, v1, -v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0a,0x40] +0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x40 + +# W32: v_cndmask_b16 v5, -v1, -v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x60] +# W64: v_cndmask_b16 v5, -v1, -v2, s[2:3] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0x05,0x0a,0x60] +0x05,0x00,0x5d,0xd6,0x01,0x05,0x0e,0x60 + +# W32: v_cndmask_b16 v5, |v1|, v2, s3 ; encoding: [0x05,0x01,0x5d,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v5, |v1|, v2, s[2:3] ; encoding: [0x05,0x01,0x5d,0xd6,0x01,0x05,0x0a,0x00] +0x05,0x01,0x5d,0xd6,0x01,0x05,0x0e,0x00 + +# W32: v_cndmask_b16 v5, v1, |v2|, s3 ; encoding: [0x05,0x02,0x5d,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v5, v1, |v2|, s[2:3] ; encoding: [0x05,0x02,0x5d,0xd6,0x01,0x05,0x0a,0x00] +0x05,0x02,0x5d,0xd6,0x01,0x05,0x0e,0x00 + +# W32: v_cndmask_b16 v5, |v1|, |v2|, s3 ; encoding: [0x05,0x03,0x5d,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_cndmask_b16 v5, |v1|, |v2|, s[2:3] ; encoding: [0x05,0x03,0x5d,0xd6,0x01,0x05,0x0a,0x00] +0x05,0x03,0x5d,0xd6,0x01,0x05,0x0e,0x00 + +# GFX11: v_cndmask_b16_e64_dpp v0, v1, v2, null quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x5d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x0e,0x00,0xff] +0x00,0x00,0x5d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x0e,0x00,0xff + +# W32: v_cndmask_b32_e64_dpp v202, v1, v2, s1 quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xca,0x00,0x01,0xd5,0xfa,0x04,0x06,0x00,0x01,0x0e,0x04,0xff] +# W64: v_cndmask_b32_e64_dpp v202, v1, v2, s[0:1] quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xca,0x00,0x01,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0e,0x04,0xff] +0xca,0x00,0x01,0xd5,0xfa,0x04,0x06,0x00,0x01,0x0e,0x04,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v199, v1 dpp8:[0,0,2,3,4,4,6,7] fi:1 ; encoding: [0xc7,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x80,0x46,0xfa] +0xc7,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x80,0x46,0xfa + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 div:2 dpp8:[0,2,1,3,4,5,6,7] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x18,0x01,0x50,0xc6,0xfa] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x18,0x01,0x50,0xc6,0xfa + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 clamp row_shl:7 row_mask:0x0 bank_mask:0x0 ; encoding: [0x05,0x80,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x01,0x00] +0x05,0x80,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x01,0x00 + +# GFX11: v_floor_f32_e64_dpp v5, v1 clamp row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x05,0x80,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x05,0x00] +0x05,0x80,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x07,0x05,0x00 + +# GFX11: v_fma_f32_e64_dpp v80, v81, v82, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x00,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v80, v81, |v82|, v81 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa + +# GFX11: v_fma_f32_e64_dpp v93, |v94|, v95, v94 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe] +0x5d,0x01,0x13,0xd6,0xfa,0xbe,0x7a,0x05,0x5e,0x1b,0x00,0xfe + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 quad_perm:[1,3,1,0] row_mask:0x7 bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x1d,0x00,0x7f] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x1d,0x00,0x7f + +# GFX11: v_lshl_or_b32_e64_dpp v255, v5, v0, vcc_hi row_xmask:6 row_mask:0x0 bank_mask:0xf fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f] +0xff,0x00,0x56,0xd6,0xfa,0x00,0xae,0x01,0x05,0x66,0x05,0x0f + +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x80,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_maxmin_f32_e64_dpp v0, v1, v2, v3 div:2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5e,0xd6,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v126, v2 row_half_mirror row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x7e,0x41,0x09,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x7e,0x41,0x09,0xff + +# GFX11: v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa] +0x00,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa + +# GFX11: v_minmax_f32_e64_dpp v0, |v1|, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa] +0x00,0x01,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0xc6,0xfa + +# GFX11: v_mul_f16_e64_dpp v0, v2, v4 row_share:10 row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x00,0x00,0x35,0xd5,0xfa,0x08,0x02,0x00,0x02,0x5a,0x09,0xff] +0x00,0x00,0x35,0xd5,0xfa,0x08,0x02,0x00,0x02,0x5a,0x09,0xff + +# GFX11: v_mul_f32_e64_dpp v0, v1, v2 dpp8:[0,1,2,3,1,5,6,7] ; encoding: [0x00,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x88,0x96,0xfa] +0x00,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x88,0x96,0xfa + +# GFX11: v_mul_i32_i24_e64_dpp v208, v101, v4 clamp row_shr:14 row_mask:0x3 bank_mask:0xa bound_ctrl:1 ; encoding: [0xd0,0x80,0x09,0xd5,0xfa,0x08,0x02,0x00,0x65,0x1e,0x09,0x3a] +0xd0,0x80,0x09,0xd5,0xfa,0x08,0x02,0x00,0x65,0x1e,0x09,0x3a + +# GFX11: v_sat_pk_u8_i16_e64_dpp v0, v2 row_mirror row_mask:0xf bank_mask:0x2 fi:1 ; encoding: [0x00,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x02,0x40,0x05,0xf2] +0x00,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x02,0x40,0x05,0xf2 + +# GFX11: v_sub_nc_i32_e64_dpp v93, v94, v95 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1] +0x5d,0x00,0x25,0xd7,0xfa,0xbe,0x02,0x00,0x5e,0x27,0x09,0xf1 + +# W32: v_subrev_co_ci_u32_e64_dpp v0, vcc_lo, v1, v2, vcc_lo quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x7a,0x00,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v0, vcc, v1, v2, vcc quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x7a,0x00,0xff] +0x00,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x7a,0x00,0xff + +# GFX11: v_subrev_co_u32_e64_dpp v5, null, v1, v2 dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x05,0x7c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92] +0x05,0x7c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92 + +# W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92] +# W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92] +0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x92,0x44,0x92 + +# GFX11: v_xnor_b32_e64_dpp v8, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x08,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x08,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_xnor_b32_e64_dpp v8, v5, v2 quad_perm:[1,0,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0x08,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x05,0xe1,0x00,0x10] +0x08,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x05,0xe1,0x00,0x10 + +# GFX11: v_dot2_f32_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x13,0xcc,0x01,0x05,0x0e,0x1c + +# GFX11: v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x13,0xcc,0x01,0x05,0x0e,0x7c] +0x00,0x45,0x13,0xcc,0x01,0x05,0x0e,0x7c + +# GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x1a,0xcc,0x01,0x05,0x0e,0x1c + +# GFX11: v_dot2_f32_bf16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[1,0,1] ; encoding: [0x00,0x45,0x1a,0xcc,0x01,0x05,0x0e,0x3c] +0x00,0x45,0x1a,0xcc,0x01,0x05,0x0e,0x3c + +# GFX11: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0x20,0xcc,0x01,0x05,0x0e,0x04] +0x00,0x00,0x20,0xcc,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0x20,0xcc,0x01,0x05,0x0e,0x04] +0x00,0x20,0x20,0xcc,0x01,0x05,0x0e,0x04 + +# GFX11: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp ; encoding: [0x00,0xc0,0x22,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0xc0,0x22,0xcc,0x01,0x05,0x0e,0x1c + +# GFX11: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44] +0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44 + +# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05] +0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05 + +# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x05,0x13,0xcc,0xfa,0x04,0x0e,0x64,0x01,0x1b,0x00,0xfe] +0x00,0x05,0x13,0xcc,0xfa,0x04,0x0e,0x64,0x01,0x1b,0x00,0xfe + +# GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x00,0x00,0x13,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x7a,0x0c,0xff] +0x00,0x00,0x13,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x7a,0x0c,0xff + +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00 + +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00] +0xfa,0x04,0x0a,0x04,0x01,0xe4,0x04,0x00 + +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 + +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] +0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 + +# GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 clamp dpp8:[2,2,2,2,4,4,4,4] fi:1 ; encoding: [0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92] +0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92 + +# GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x00,0x00,0x20,0xcc,0xe9,0x04,0x0e,0x04,0x01,0x92,0x44,0x92] +0x00,0x00,0x20,0xcc,0xe9,0x04,0x0e,0x04,0x01,0x92,0x44,0x92 + +# GFX11: v_fma_mix_f32_e64_dpp v0, v1, v2, v3 row_ror:7 row_mask:0xf bank_mask:0x1 bound_ctrl:1 ; encoding: [0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1] +0x00,0x00,0x20,0xcc,0xfa,0x04,0x0e,0x04,0x01,0x27,0x09,0xf1 + +# GFX11: v_fma_mixhi_f16_e64_dpp v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f] +0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f + +# GFX11: v_fma_mixlo_f16_e64_dpp v0, |v1|, -v2, |v3| dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x00,0x05,0x21,0xcc,0xe9,0x04,0x0e,0x44,0x01,0x92,0x44,0x92] +0x00,0x05,0x21,0xcc,0xe9,0x04,0x0e,0x44,0x01,0x92,0x44,0x92 + +# GFX11: v_fma_mixlo_f16_e64_dpp v0, |v1|, -v2, |v3| op_sel:[1,0,0] op_sel_hi:[1,0,0] dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92] +0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92 + +# W32: v_cmp_class_f32_e64_dpp s10, v10, v2 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x88,0xc7,0xfa] +# W64: v_cmp_class_f32_e64_dpp s[10:11], v10, v2 dpp8:[0,1,6,3,4,5,6,7] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x88,0xc7,0xfa] +0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x88,0xc7,0xfa + +# W32: v_cmp_class_f32_e64_dpp s10, v10, v2 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x7a,0x0c,0xff] +# W64: v_cmp_class_f32_e64_dpp s[10:11], v10, v2 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x7a,0x0c,0xff] +0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x7a,0x0c,0xff + +# W32: v_cmp_f_f32 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x20,0x7c,0x01,0x07,0x01,0x00] +# W64: v_cmp_f_f32 vcc, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x20,0x7c,0x01,0x07,0x01,0x00] +0xfa,0x04,0x20,0x7c,0x01,0x07,0x01,0x00 + +# W32: v_cmp_ge_i16_e64_dpp s10, v10, v2 quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x78,0x00,0x0f] +# W64: v_cmp_ge_i16_e64_dpp s[10:11], v10, v2 quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x78,0x00,0x0f] +0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x78,0x00,0x0f + +# W32: v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[1,3,1,0] row_mask:0x7 bank_mask:0xf ; encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1d,0x00,0x7f] +# W64: v_cmp_gt_i16 vcc, v1, v2 quad_perm:[1,3,1,0] row_mask:0x7 bank_mask:0xf ; encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1d,0x00,0x7f] +0xfa,0x04,0x68,0x7c,0x01,0x1d,0x00,0x7f + +# W32: v_cmp_gt_i32_e64_dpp s10, v1, v50 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x0a,0x00,0x44,0xd4,0xea,0x64,0x02,0x00,0x01,0x88,0xc6,0xfa] +# W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v50 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x0a,0x00,0x44,0xd4,0xea,0x64,0x02,0x00,0x01,0x88,0xc6,0xfa] +0x0a,0x00,0x44,0xd4,0xea,0x64,0x02,0x00,0x01,0x88,0xc6,0xfa + +# W32: v_cmp_gt_i32 vcc_lo, v1, v255 row_mirror row_mask:0xf bank_mask:0x2 fi:1 ; encoding: [0xfa,0xfe,0x89,0x7c,0x01,0x40,0x05,0xf2] +# W64: v_cmp_gt_i32 vcc, v1, v255 row_mirror row_mask:0xf bank_mask:0x2 fi:1 ; encoding: [0xfa,0xfe,0x89,0x7c,0x01,0x40,0x05,0xf2] +0xfa,0xfe,0x89,0x7c,0x01,0x40,0x05,0xf2 + +# W32: v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] +# W64: v_cmp_gt_u16 vcc, v1, v2 row_shl:7 row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00] +0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00 + +# W32: v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] ; encoding: [0xe9,0x04,0x66,0x7c,0x01,0xff,0x47,0xfa] +# W64: v_cmp_le_i16 vcc, v1, v2 dpp8:[7,7,7,3,4,4,6,7] ; encoding: [0xe9,0x04,0x66,0x7c,0x01,0xff,0x47,0xfa] +0xe9,0x04,0x66,0x7c,0x01,0xff,0x47,0xfa + +# W32: v_cmp_le_i32_e64_dpp s10, v1, v50 quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x64,0x02,0x00,0x01,0x78,0x00,0x0f] +# W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v50 quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x64,0x02,0x00,0x01,0x78,0x00,0x0f] +0x0a,0x00,0x43,0xd4,0xfa,0x64,0x02,0x00,0x01,0x78,0x00,0x0f + +# W32: v_cmp_le_i32 vcc_lo, v1, v255 dpp8:[0,2,1,3,4,5,6,7] ; encoding: [0xe9,0xfe,0x87,0x7c,0x01,0x50,0xc6,0xfa] +# W64: v_cmp_le_i32 vcc, v1, v255 dpp8:[0,2,1,3,4,5,6,7] ; encoding: [0xe9,0xfe,0x87,0x7c,0x01,0x50,0xc6,0xfa] +0xe9,0xfe,0x87,0x7c,0x01,0x50,0xc6,0xfa + +# W32: v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] +# W64: v_cmp_le_u16 vcc, v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1 ; encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa] +0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa + +# W32: v_cmp_lt_f32 vcc_lo, v1, -v2 quad_perm:[0,1,2,2] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7c,0x01,0xa4,0x40,0xff] +# W64: v_cmp_lt_f32 vcc, v1, -v2 quad_perm:[0,1,2,2] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7c,0x01,0xa4,0x40,0xff] +0xfa,0x04,0x22,0x7c,0x01,0xa4,0x40,0xff + +# W32: v_cmp_lt_i16_e64_dpp s10, v10, v2 dpp8:[7,6,5,3,4,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x77,0x47,0x05] +# W64: v_cmp_lt_i16_e64_dpp s[10:11], v10, v2 dpp8:[7,6,5,3,4,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x77,0x47,0x05] +0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x77,0x47,0x05 + +# W32: v_cmp_ngt_f32_e64_dpp s10, -v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x0a,0x00,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x88,0xc6,0xfa] +# W64: v_cmp_ngt_f32_e64_dpp s[10:11], -v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x0a,0x00,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x88,0xc6,0xfa] +0x0a,0x00,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x88,0xc6,0xfa + +# W32: v_cmp_nle_f32_e64_dpp s10, -v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x1b,0x00,0x00] +# W64: v_cmp_nle_f32_e64_dpp s[10:11], -v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x1b,0x00,0x00] +0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x1b,0x00,0x00 + +# W32: v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[0,2,1,3,4,5,6,7] ; encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x50,0xc6,0xfa] +# W64: v_cmp_t_f32 vcc, v1, v2 dpp8:[0,2,1,3,4,5,6,7] ; encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x50,0xc6,0xfa] +0xe9,0x04,0x3e,0x7c,0x01,0x50,0xc6,0xfa + +# GFX11: v_cmpx_class_f16 v12, v101 dpp8:[7,6,5,3,4,2,1,0] ; encoding: [0xe9,0xca,0xfa,0x7d,0x0c,0x77,0x47,0x05] +0xe9,0xca,0xfa,0x7d,0x0c,0x77,0x47,0x05 + +# GFX11: v_cmpx_class_f16 v12, v101 quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x04,0xff] +0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x04,0xff + +# GFX11: v_cmpx_class_f16 |v12|, v101 quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x20,0xff] +0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x20,0xff + +# GFX11: v_cmpx_f_f32 v255, v2 quad_perm:[2,3,0,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x20,0x7d,0xff,0x0e,0x00,0xff] +0xfa,0x04,0x20,0x7d,0xff,0x0e,0x00,0xff + +# GFX11: v_cmpx_f_i32 v0, v2 dpp8:[7,6,5,3,4,2,1,0] fi:1 ; encoding: [0xea,0x04,0x80,0x7d,0x00,0x77,0x47,0x05] +0xea,0x04,0x80,0x7d,0x00,0x77,0x47,0x05 + +# GFX11: v_cmpx_t_f32 v255, v2 dpp8:[7,6,5,3,4,2,1,0] ; encoding: [0xe9,0x04,0x3e,0x7d,0xff,0x77,0x47,0x05] +0xe9,0x04,0x3e,0x7d,0xff,0x77,0x47,0x05 + +# GFX11: v_cmpx_t_i32 v0, v2 row_shr:14 row_mask:0x3 bank_mask:0xa bound_ctrl:1 ; encoding: [0xfa,0x04,0x8e,0x7d,0x00,0x1e,0x09,0x3a] +0xfa,0x04,0x8e,0x7d,0x00,0x1e,0x09,0x3a + +# GFX11: v_permlane64_b32 v5, v1 ; encoding: [0x01,0xcf,0x0a,0x7e] +0x01,0xcf,0x0a,0x7e + +# GFX11: v_permlane64_b32 v255, v1 ; encoding: [0x01,0xcf,0xfe,0x7f] +0x01,0xcf,0xfe,0x7f + +# GFX11: v_permlane64_b32 v5, v255 ; encoding: [0xff,0xcf,0x0a,0x7e] +0xff,0xcf,0x0a,0x7e + +# GFX11: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_and_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_and_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x62,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_and_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, s105, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x69,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, ttmp15, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x7b,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, 0x3800, v2 +0x05,0x00,0x62,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, 0xc400, v2 +0x05,0x00,0x62,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, src_scc, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0x04,0x02,0x00] +0x05,0x00,0x62,0xd7,0xfd,0x04,0x02,0x00 + +# GFX11: v_and_b16 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x62,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_and_b16 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x62,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x62,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_and_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xd3,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xf7,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xf7,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x62,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_and_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x62,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_and_b16 v5, v1, 0x3800 +0x05,0x00,0x62,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_and_b16 v5, v1, 0xc400 +0x05,0x00,0x62,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_and_b16 v5, v1, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xfb,0x01,0x00] +0x05,0x00,0x62,0xd7,0x01,0xfb,0x01,0x00 + +# GFX11: v_and_b16 v5, v1, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_and_b16 v5, v1, 0x3456 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x62,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_or_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_or_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x63,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_or_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, s105, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x69,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, ttmp15, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x7b,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, 0x3800, v2 +0x05,0x00,0x63,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, 0xc400, v2 +0x05,0x00,0x63,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, src_scc, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0x04,0x02,0x00] +0x05,0x00,0x63,0xd7,0xfd,0x04,0x02,0x00 + +# GFX11: v_or_b16 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x63,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_or_b16 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x63,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x63,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_or_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xd3,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xf7,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xf7,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x63,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_or_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x63,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_or_b16 v5, v1, 0x3800 +0x05,0x00,0x63,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_or_b16 v5, v1, 0xc400 +0x05,0x00,0x63,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_or_b16 v5, v1, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xfb,0x01,0x00] +0x05,0x00,0x63,0xd7,0x01,0xfb,0x01,0x00 + +# GFX11: v_or_b16 v5, v1, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_or_b16 v5, v1, 0x3456 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_xor_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +0xff,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 + +# GFX11: v_xor_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0x05,0x02,0x00] +0x05,0x00,0x64,0xd7,0xff,0x05,0x02,0x00 + +# GFX11: v_xor_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x01,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, s105, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x69,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x6a,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x6b,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, ttmp15, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x7b,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x7d,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x7e,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x7f,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x80,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0x80,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0xc1,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, 0x3800, v2 +0x05,0x00,0x64,0xd7,0xf0,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, 0xc400, v2 +0x05,0x00,0x64,0xd7,0xf7,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, src_scc, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0x04,0x02,0x00] +0x05,0x00,0x64,0xd7,0xfd,0x04,0x02,0x00 + +# GFX11: v_xor_b16 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x64,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_xor_b16 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x64,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x64,0xd7,0x01,0xff,0x03,0x00 + +# GFX11: v_xor_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0x05,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xd3,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xd5,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd7,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xd7,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xf7,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xf7,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xfb,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xfb,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xfd,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xfd,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xff,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x01,0x01,0x00] +0x05,0x00,0x64,0xd7,0x01,0x01,0x01,0x00 + +# GFX11: v_xor_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x83,0x01,0x00] +0x05,0x00,0x64,0xd7,0x01,0x83,0x01,0x00 + +# GFX11: v_xor_b16 v5, v1, 0x3800 +0x05,0x00,0x64,0xd7,0x01,0xe1,0x01,0x00 + +# GFX11: v_xor_b16 v5, v1, 0xc400 +0x05,0x00,0x64,0xd7,0x01,0xef,0x01,0x00 + +# GFX11: v_xor_b16 v5, v1, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xfb,0x01,0x00] +0x05,0x00,0x64,0xd7,0x01,0xfb,0x01,0x00 + +# GFX11: v_xor_b16 v5, v1, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xff,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_xor_b16 v5, v1, 0x3456 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] +0x05,0x00,0x64,0xd7,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00 diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll new file mode 100644 index 0000000000000..7ae4bf3f99cd8 --- /dev/null +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CGSCC +; + +define internal i8 @read_arg(i8* %p) { +; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@read_arg +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %l = load i8, i8* %p, align 1 + ret i8 %l +} + +define internal i8 @read_arg_index(i8* %p, i64 %index) { +; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@read_arg_index +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[G:%.*]] = getelementptr inbounds i8, i8* [[P]], i64 2 +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[G]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %g = getelementptr inbounds i8, i8* %p, i64 %index + %l = load i8, i8* %g, align 1 + ret i8 %l +} + +define i8 @call_simplifiable_1() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1 +; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: ret i8 2 +; +; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_1 +; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i0, align 1 + %r = call i8 @read_arg(i8* %i0) + ret i8 %r +} + +;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. + +define internal i8 @read_arg_1(i8* %p) { +; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@read_arg_1 +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %l = load i8, i8* %p, align 1 + ret i8 %l +} + +define internal i8 @sum_two_same_loads(i8* %p) { +; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; CGSCC-NEXT: ret i8 [[Z]] +; + %x = call i8 @read_arg_1(i8* %p) + %y = call i8 @read_arg_1(i8* %p) + %z = add nsw i8 %x, %y + ret i8 %z +} + +define i8 @call_simplifiable_2() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 +; TUNIT-SAME: () #[[ATTR0]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: ret i8 4 +; +; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_2 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 +; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i0 + %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i1 + %r = call i8 @sum_two_same_loads(i8* %i0) + ret i8 %r +} + +define i8 @call_simplifiable_3() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3 +; TUNIT-SAME: () #[[ATTR0]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: ret i8 2 +; +; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_3 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2 +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[I0]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 + %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i2, align 1 + %r = call i8 @read_arg_index(i8* %i0, i64 2) + ret i8 %r +} + +;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. + +define internal i8 @read_arg_2(i8* %p) { +; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 +; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; TUNIT-NEXT: ret i8 [[L]] +; +; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@read_arg_2 +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %l = load i8, i8* %p, align 1 + ret i8 %l +} + +define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { +; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads +; TUNIT-SAME: (i8* nocapture nofree nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree nonnull readonly dereferenceable(972) [[P]]) #[[ATTR3:[0-9]+]] +; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR3]] +; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; TUNIT-NEXT: ret i8 [[Z]] +; +; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR4]] +; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; CGSCC-NEXT: ret i8 [[Z]] +; + %x = call i8 @read_arg_2(i8* %p) + %y = call i8 @read_arg_2(i8* %q) + %z = add nsw i8 %x, %y + ret i8 %z +} + +define i8 @call_partially_simplifiable_1() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 +; TUNIT-SAME: () #[[ATTR0]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: store i8 2, i8* [[I2]], align 2 +; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: store i8 3, i8* [[I3]], align 1 +; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]] +; TUNIT-NEXT: ret i8 [[R]] +; +; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2 +; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I3]], align 1 +; CGSCC-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; CGSCC-NEXT: store i8 4, i8* [[I4]], align 4 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i2 + %i3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i3 + %i4 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 4 + ;;; This store is redundant, hence removed. + store i8 4, i8* %i4 + %r = call i8 @sum_two_different_loads(i8* %i2, i8* %i3) + ret i8 %r +} + +define i8 @call_partially_simplifiable_2(i1 %cond) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; TUNIT-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; TUNIT-NEXT: store i8 2, i8* [[I52]], align 4 +; TUNIT-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; TUNIT-NEXT: store i8 3, i8* [[I53]], align 1 +; TUNIT-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR3]] +; TUNIT-NEXT: ret i8 [[R]] +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; CGSCC-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; CGSCC-NEXT: store i8 2, i8* [[I52]], align 4 +; CGSCC-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; CGSCC-NEXT: store i8 3, i8* [[I53]], align 1 +; CGSCC-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; CGSCC-NEXT: store i8 4, i8* [[I54]], align 2 +; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i51 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 51 + %i52 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 52 + store i8 2, i8* %i52 + %i53 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 53 + store i8 3, i8* %i53 + %i54 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 54 + ;;; This store is redundant, hence removed. Not affected by the select. + store i8 4, i8* %i54 + %sel = select i1 %cond, i8* %i51, i8 *%i52 + %r = call i8 @sum_two_different_loads(i8* %sel, i8* %i53) + ret i8 %r +} + +;. +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn } +;. +; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR4]] = { readonly willreturn } +;. diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll new file mode 100644 index 0000000000000..2e2b2aca2f74d --- /dev/null +++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +%struct.T = type { i32, [10 x [20 x i8]] } + +define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @select_offsets_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 20, i64 26 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + ;; Adjust the offsets so that they match the stores after adding 3 + %sel0 = select i1 %cnd1, i64 20, i64 26 + %sel1 = select i1 %cnd2, i64 %sel0, i64 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + +define i8 @select_offsets_simplifiable_3(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_3 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]] +; CHECK-NEXT: ret i8 100 +; +entry: + %bundle = alloca %struct.T, align 64 + %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 1, i64 1 + store i8 100, i8* %gep.fixed, align 4 + %sel1 = select i1 %cnd1, i64 1, i64 3 + %sel2 = select i1 %cnd2, i64 5, i64 11 + %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2 + store i8 42, i8* %gep.sel, align 4 + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 100, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 100, i8* %gep23, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: store i8 100, i8* [[GEP32]], align 16 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + store i8 100, i8* %gep32, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: store i8 100, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP29]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + store i8 100, i8* %gep.sel, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + %i = load i8, i8* %gep29, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: store i8 100, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP32]], align 16 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + store i8 100, i8* %gep.plus, align 4 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + %i = load i8, i8* %gep32, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_5(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_5 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 3, i64 5 +; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]] +; CHECK-NEXT: store i8 42, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %bundle = alloca %struct.T, align 64 + %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 3, i64 5 + store i8 100, i8* %gep.fixed, align 4 + %sel1 = select i1 %cnd1, i64 1, i64 3 + %sel2 = select i1 %cnd2, i64 5, i64 11 + %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2 + + ;; This store prevents the constant 100 from being propagated to ret + store i8 42, i8* %gep.sel, align 4 + + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +define i8 @select_gep_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK-LABEL: define {{[^@]+}}@select_gep_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; CHECK-NEXT: ret i8 21 +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 21, i8* %gep3, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 + store i8 42, i8* %sel.ptr, align 4 + %i = load i8, i8* %gep3, align 4 + ret i8 %i +} + +define i8 @select_gep_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn +; CHECK-LABEL: define {{[^@]+}}@select_gep_not_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 + store i8 42, i8* %sel.ptr, align 4 + %i = load i8, i8* %gep7, align 4 + ret i8 %i +} + +; FIXME: This should be simplifiable. See comment inside. + +define i8 @phi_offsets_fixme(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@phi_offsets_fixme +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 16 +; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 3, [[THEN]] ], [ 11, [[ELSE]] ] +; CHECK-NEXT: [[GEP_PHI:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[PHI]] +; CHECK-NEXT: store i8 42, i8* [[GEP_PHI]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 16 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 + store i8 100, i8* %gep.fixed, align 4 + br i1 %cnd1, label %then, label %else + +then: + br label %join + +else: + br label %join + +join: + ; FIXME: AAPotentialConstantValues does not detect the constant values for the + ; PHI below. It needs to rely on AAPotentialValues. + %phi = phi i64 [ 3, %then ], [ 11, %else ] + %gep.phi = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %phi + store i8 42, i8* %gep.phi, align 4 + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +;. +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } +;. diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 49c17d61575fc..e2cea9d83813e 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -211,7 +211,8 @@ define i32 @nc1(i32* %q, i32* %p, i1 %b) { ; TUNIT-NEXT: e: ; TUNIT-NEXT: br label [[L:%.*]] ; TUNIT: l: -; TUNIT-NEXT: [[Y:%.*]] = phi i32* [ [[Q]], [[E:%.*]] ] +; TUNIT-NEXT: [[X:%.*]] = phi i32* [ [[P]], [[E:%.*]] ] +; TUNIT-NEXT: [[Y:%.*]] = phi i32* [ [[Q]], [[E]] ] ; TUNIT-NEXT: [[TMP2:%.*]] = select i1 [[B]], i32* [[P]], i32* [[Q]] ; TUNIT-NEXT: [[VAL:%.*]] = load i32, i32* [[TMP2]], align 4 ; TUNIT-NEXT: store i32 0, i32* [[P]], align 4 @@ -224,7 +225,8 @@ define i32 @nc1(i32* %q, i32* %p, i1 %b) { ; CGSCC-NEXT: e: ; CGSCC-NEXT: br label [[L:%.*]] ; CGSCC: l: -; CGSCC-NEXT: [[Y:%.*]] = phi i32* [ [[Q]], [[E:%.*]] ] +; CGSCC-NEXT: [[X:%.*]] = phi i32* [ [[P]], [[E:%.*]] ] +; CGSCC-NEXT: [[Y:%.*]] = phi i32* [ [[Q]], [[E]] ] ; CGSCC-NEXT: [[TMP2:%.*]] = select i1 [[B]], i32* [[P]], i32* [[Q]] ; CGSCC-NEXT: [[VAL:%.*]] = load i32, i32* [[TMP2]], align 4 ; CGSCC-NEXT: store i32 0, i32* [[P]], align 4 diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index 5e65e826f96cc..8ad74f73d9773 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -105,7 +105,7 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ] ; TUNIT-NEXT: br label [[RETURN]] ; TUNIT: return: -; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[R]] ; ; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -140,7 +140,7 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { ; CGSCC-NEXT: [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: -; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; CGSCC-NEXT: ret i32 [[R]] ; entry: @@ -220,7 +220,7 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ] ; TUNIT-NEXT: br label [[RETURN]] ; TUNIT: return: -; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; ; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -256,7 +256,7 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { ; CGSCC-NEXT: [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: -; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; CGSCC-NEXT: ret i32 [[RETVAL_0]] ; entry: @@ -389,7 +389,7 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; TUNIT-NEXT: [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ] ; TUNIT-NEXT: br label [[RETURN]] ; TUNIT: return: -; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; TUNIT-NEXT: ret double* [[R]] ; ; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -424,7 +424,7 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; CGSCC-NEXT: [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: -; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] +; CGSCC-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[R]], [[COND_END]] ] ; CGSCC-NEXT: ret double* [[R]] ; entry: @@ -800,6 +800,7 @@ define double @select_and_phi(double %b) #0 { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: +; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[B]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret double [[B]] ; entry: @@ -838,6 +839,7 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 { ; TUNIT-NEXT: [[CALL:%.*]] = call double @recursion_select_and_phi(i32 [[DEC]], double [[B]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: +; TUNIT-NEXT: [[PHI:%.*]] = phi double [ [[B]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ] ; TUNIT-NEXT: ret double [[B]] ; ; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -851,6 +853,7 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 { ; CGSCC-NEXT: [[CALL:%.*]] = call double @recursion_select_and_phi(i32 [[DEC]], double [[B]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: +; CGSCC-NEXT: [[PHI:%.*]] = phi double [ [[B]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ] ; CGSCC-NEXT: ret double [[B]] ; entry: diff --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll index 260626f383437..3adb57ec21840 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll @@ -2,10 +2,17 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC +@Gstatic_int1 = internal global i32 zeroinitializer, align 4 +@Gstatic_int2 = internal global i32 zeroinitializer, align 4 + declare void @llvm.assume(i1) declare void @useI1p(i1*) declare void @unknown() +;. +; CHECK: @[[GSTATIC_INT1:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4 +; CHECK: @[[GSTATIC_INT2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4 +;. define i1 @readI1p(i1* %p) { ; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; CHECK-LABEL: define {{[^@]+}}@readI1p @@ -40,13 +47,13 @@ define i1 @drop_assume_1c_nr() norecurse { ; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { -; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6:[0-9]+]] ; TUNIT-NEXT: ret i1 true ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { -; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR7:[0-9]+]] ; CGSCC-NEXT: ret i1 true ; %stack = alloca i1 @@ -80,13 +87,14 @@ define i1 @keep_assume_2c_nr() norecurse { } define i1 @keep_assume_3c_nr() norecurse { +; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_3c_nr ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -96,7 +104,7 @@ define i1 @keep_assume_3c_nr() norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -108,13 +116,14 @@ define i1 @keep_assume_3c_nr() norecurse { ret i1 %l } define i1 @keep_assume_4c_nr() norecurse { +; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_4c_nr ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L4]] ; @@ -124,7 +133,7 @@ define i1 @keep_assume_4c_nr() norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L4]] ; @@ -161,7 +170,7 @@ define i1 @drop_assume_1_nr(i1 %arg) norecurse { ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR6]] ; TUNIT-NEXT: ret i1 [[ARG]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -169,7 +178,7 @@ define i1 @drop_assume_1_nr(i1 %arg) norecurse { ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR7]] ; CGSCC-NEXT: ret i1 [[ARG]] ; %stack = alloca i1 @@ -203,13 +212,14 @@ define i1 @keep_assume_2_nr(i1 %arg) norecurse { } define i1 @keep_assume_3_nr(i1 %arg) norecurse { +; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_3_nr ; TUNIT-SAME: (i1 [[ARG:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -219,7 +229,7 @@ define i1 @keep_assume_3_nr(i1 %arg) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -232,13 +242,14 @@ define i1 @keep_assume_3_nr(i1 %arg) norecurse { } define i1 @keep_assume_4_nr(i1 %arg) norecurse { +; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_4_nr ; TUNIT-SAME: (i1 [[ARG:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -248,7 +259,7 @@ define i1 @keep_assume_4_nr(i1 %arg) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -266,7 +277,7 @@ define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: br label [[M:%.*]] @@ -280,7 +291,7 @@ define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: br label [[M:%.*]] @@ -311,7 +322,7 @@ define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: br label [[M:%.*]] @@ -326,7 +337,7 @@ define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: br label [[M:%.*]] @@ -365,7 +376,7 @@ define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: ret i1 [[L]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -382,7 +393,7 @@ define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: ret i1 [[L]] ; %stack = alloca i1 @@ -415,7 +426,7 @@ define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -432,7 +443,7 @@ define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: ret void ; %stack = alloca i1 @@ -457,7 +468,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -466,7 +477,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7:[0-9]+]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -475,7 +486,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -484,7 +495,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8:[0-9]+]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -518,8 +529,8 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -536,8 +547,8 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -563,22 +574,22 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M:%.*]] ; TUNIT: f: ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -587,22 +598,22 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M:%.*]] ; CGSCC: f: ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -634,22 +645,22 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M:%.*]] ; TUNIT: f: ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -658,22 +669,22 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M:%.*]] ; CGSCC: f: ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -720,13 +731,13 @@ define i1 @drop_assume_1c() { ; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c ; TUNIT-SAME: () #[[ATTR3]] { -; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR6]] ; TUNIT-NEXT: ret i1 true ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR7]] ; CGSCC-NEXT: ret i1 true ; %stack = alloca i1 @@ -758,11 +769,12 @@ define i1 @keep_assume_2c() { } define i1 @keep_assume_3c() { +; ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_3c() { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -770,7 +782,7 @@ define i1 @keep_assume_3c() { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -782,11 +794,12 @@ define i1 @keep_assume_3c() { ret i1 %l } define i1 @keep_assume_4c() { +; ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_4c() { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L4]] ; @@ -794,7 +807,7 @@ define i1 @keep_assume_4c() { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L4]] ; @@ -830,7 +843,7 @@ define i1 @drop_assume_1(i1 %arg) { ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR6]] ; TUNIT-NEXT: ret i1 [[ARG]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -838,7 +851,7 @@ define i1 @drop_assume_1(i1 %arg) { ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR7]] ; CGSCC-NEXT: ret i1 [[ARG]] ; %stack = alloca i1 @@ -871,12 +884,13 @@ define i1 @keep_assume_2(i1 %arg) { } define i1 @keep_assume_3(i1 %arg) { +; ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_3 ; TUNIT-SAME: (i1 [[ARG:%.*]]) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -885,7 +899,7 @@ define i1 @keep_assume_3(i1 %arg) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -898,12 +912,13 @@ define i1 @keep_assume_3(i1 %arg) { } define i1 @keep_assume_4(i1 %arg) { +; ; TUNIT-LABEL: define {{[^@]+}}@keep_assume_4 ; TUNIT-SAME: (i1 [[ARG:%.*]]) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; TUNIT-NEXT: ret i1 [[L]] ; @@ -912,7 +927,7 @@ define i1 @keep_assume_4(i1 %arg) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: call void @useI1p(i1* noalias nocapture noundef nonnull dereferenceable(1) [[STACK]]) ; CGSCC-NEXT: ret i1 [[L]] ; @@ -930,7 +945,7 @@ define i1 @assume_1(i1 %arg, i1 %cond) { ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: br label [[M:%.*]] @@ -944,7 +959,7 @@ define i1 @assume_1(i1 %arg, i1 %cond) { ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: br label [[M:%.*]] @@ -975,7 +990,7 @@ define void @assume_1b(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: br label [[M:%.*]] @@ -990,7 +1005,7 @@ define void @assume_1b(i1 %arg, i1 %cond) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: br label [[M:%.*]] @@ -1029,7 +1044,7 @@ define i1 @assume_2(i1 %arg, i1 %cond) { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: ret i1 [[L]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -1046,7 +1061,7 @@ define i1 @assume_2(i1 %arg, i1 %cond) { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: ret i1 [[L]] ; %stack = alloca i1 @@ -1079,7 +1094,7 @@ define void @assume_2b(i1 %arg, i1 %cond) { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn @@ -1096,7 +1111,7 @@ define void @assume_2b(i1 %arg, i1 %cond) { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: ret void ; %stack = alloca i1 @@ -1121,7 +1136,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1130,7 +1145,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn @@ -1139,7 +1154,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 @@ -1148,7 +1163,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1182,8 +1197,8 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn @@ -1200,8 +1215,8 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1227,22 +1242,22 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M:%.*]] ; TUNIT: f: ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn @@ -1251,22 +1266,22 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 [[ARG]], i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M:%.*]] ; CGSCC: f: ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1298,22 +1313,22 @@ define i1 @assume_5c(i1 %cond) { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]] ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: ; TUNIT-NEXT: store i1 true, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M:%.*]] ; TUNIT: f: ; TUNIT-NEXT: store i1 false, i1* [[STACK]], align 1 ; TUNIT-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR4]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR6]] ; TUNIT-NEXT: br label [[M]] ; TUNIT: m: ; TUNIT-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR4]] -; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR6]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR7]] ; TUNIT-NEXT: ret i1 [[R]] ; ; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn @@ -1322,22 +1337,22 @@ define i1 @assume_5c(i1 %cond) { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L1:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR7]] ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: ; CGSCC-NEXT: store i1 true, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L2:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L2]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M:%.*]] ; CGSCC: f: ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: [[L3:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR5]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L3]]) #[[ATTR7]] ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 -; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR7]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR8]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1362,19 +1377,116 @@ m: ret i1 %r } +define i32 @assume_read_global_good() { +; +; +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@assume_read_global_good +; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { +; TUNIT-NEXT: [[LGS1:%.*]] = load i32, i32* @Gstatic_int1, align 4 +; TUNIT-NEXT: [[C:%.*]] = icmp eq i32 [[LGS1]], 42 +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[C]]) #[[ATTR6]] +; TUNIT-NEXT: store i32 13, i32* @Gstatic_int1, align 4 +; TUNIT-NEXT: store i32 17, i32* @Gstatic_int1, align 4 +; TUNIT-NEXT: [[LGS3:%.*]] = load i32, i32* @Gstatic_int1, align 4 +; TUNIT-NEXT: [[ADD:%.*]] = add i32 42, [[LGS3]] +; TUNIT-NEXT: ret i32 [[ADD]] +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@assume_read_global_good +; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { +; CGSCC-NEXT: [[LGS1:%.*]] = load i32, i32* @Gstatic_int1, align 4 +; CGSCC-NEXT: [[C:%.*]] = icmp eq i32 [[LGS1]], 42 +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[C]]) #[[ATTR7]] +; CGSCC-NEXT: store i32 13, i32* @Gstatic_int1, align 4 +; CGSCC-NEXT: store i32 17, i32* @Gstatic_int1, align 4 +; CGSCC-NEXT: [[LGS3:%.*]] = load i32, i32* @Gstatic_int1, align 4 +; CGSCC-NEXT: [[ADD:%.*]] = add i32 42, [[LGS3]] +; CGSCC-NEXT: ret i32 [[ADD]] +; + %lgs1 = load i32, i32* @Gstatic_int1 + %c = icmp eq i32 %lgs1, 42 + call void @llvm.assume(i1 %c) + %lgs2 = load i32, i32* @Gstatic_int1 + store i32 13, i32* @Gstatic_int1, align 4 + store i32 17, i32* @Gstatic_int1, align 4 + %lgs3 = load i32, i32* @Gstatic_int1 + %add = add i32 %lgs2, %lgs3 + ret i32 %add +} + +; TODO: Technically we could still utilize the assumption if we employ AA. +define i32 @assume_read_global_bad(i32* %p) { +; +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@assume_read_global_bad +; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: [[LGS1:%.*]] = load i32, i32* @Gstatic_int2, align 4 +; TUNIT-NEXT: [[C:%.*]] = icmp eq i32 [[LGS1]], 42 +; TUNIT-NEXT: store i32 13, i32* [[P]], align 4 +; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[C]]) #[[ATTR6]] +; TUNIT-NEXT: [[LGS2:%.*]] = load i32, i32* @Gstatic_int2, align 4 +; TUNIT-NEXT: store i32 17, i32* @Gstatic_int2, align 4 +; TUNIT-NEXT: ret i32 [[LGS2]] +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@assume_read_global_bad +; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: [[LGS1:%.*]] = load i32, i32* @Gstatic_int2, align 4 +; CGSCC-NEXT: [[C:%.*]] = icmp eq i32 [[LGS1]], 42 +; CGSCC-NEXT: store i32 13, i32* [[P]], align 4 +; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[C]]) #[[ATTR7]] +; CGSCC-NEXT: [[LGS2:%.*]] = load i32, i32* @Gstatic_int2, align 4 +; CGSCC-NEXT: store i32 17, i32* @Gstatic_int2, align 4 +; CGSCC-NEXT: ret i32 [[LGS2]] +; + %lgs1 = load i32, i32* @Gstatic_int2 + %c = icmp eq i32 %lgs1, 42 + store i32 13, i32* %p, align 4 + call void @llvm.assume(i1 %c) + %lgs2 = load i32, i32* @Gstatic_int2 + store i32 17, i32* @Gstatic_int2, align 4 + ret i32 %lgs2 +} + +define void @assume_write_globals() { +; +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT-LABEL: define {{[^@]+}}@assume_write_globals +; TUNIT-SAME: () #[[ATTR5:[0-9]+]] { +; TUNIT-NEXT: store i32 42, i32* @Gstatic_int1, align 4 +; TUNIT-NEXT: store i32 42, i32* @Gstatic_int2, align 4 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC-LABEL: define {{[^@]+}}@assume_write_globals +; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { +; CGSCC-NEXT: store i32 42, i32* @Gstatic_int1, align 4 +; CGSCC-NEXT: store i32 42, i32* @Gstatic_int2, align 4 +; CGSCC-NEXT: ret void +; + store i32 42, i32* @Gstatic_int1, align 4 + store i32 42, i32* @Gstatic_int2, align 4 + ret void +} + ;. ; TUNIT: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; TUNIT: attributes #[[ATTR2]] = { norecurse } ; TUNIT: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR6]] = { willreturn } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind readonly willreturn } ;. ; CGSCC: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; CGSCC: attributes #[[ATTR2]] = { norecurse } ; CGSCC: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR4]] = { inaccessiblememonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { willreturn } -; CGSCC: attributes #[[ATTR6]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR7]] = { willreturn } +; CGSCC: attributes #[[ATTR8]] = { readonly willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index cd992da4c9f98..e97dd4a8e5d05 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -2223,6 +2223,7 @@ define i8 @phi_no_store_2() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2241,6 +2242,7 @@ define i8 @phi_no_store_2() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2281,6 +2283,7 @@ define i8 @phi_no_store_3() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2302,6 +2305,7 @@ define i8 @phi_no_store_3() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -3103,6 +3107,56 @@ define void @scope_value_traversal_helper(i32* %a, i1 %c) { ret void } +define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: ret i8 100 +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %offset = add i64 5, 7 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12 + %gep.sum = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset + store i8 100, i8* %gep.fixed, align 4 + %i = load i8, i8* %gep.sum, align 4 + ret i8 %i +} + +; FIXME: This should be simplifiable. See comment inside. + +define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: store i8 100, i8* [[GEP_LOADED]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %addr = alloca i64, align 16 + %gep.addr = getelementptr inbounds i64, i64* %addr, i64 0 + store i64 12, i64* %gep.addr, align 8 + %offset = load i64, i64* %gep.addr, align 8 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12 + + ; FIXME: AAPotentialConstantValues does not detect the constant offset being + ; passed to this GEP. It needs to rely on AAPotentialValues. + %gep.loaded = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset + store i8 100, i8* %gep.loaded, align 4 + + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 7c79c4e3d059c..479d6657a5328 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -188,12 +188,11 @@ define void @test-select-phi(i1 %c) { ; CHECK-NEXT: [[PHI_NOT_SAME:%.*]] = phi i32 [ 0, [[IF_TRUE]] ], [ 1, [[IF_FALSE]] ] ; CHECK-NEXT: [[PHI_SAME_PROP:%.*]] = phi i32 [ 1, [[IF_TRUE]] ], [ 1, [[IF_FALSE]] ] ; CHECK-NEXT: [[PHI_SAME_UNDEF:%.*]] = phi i32 [ 1, [[IF_TRUE]] ], [ undef, [[IF_FALSE]] ] -; CHECK-NEXT: [[SELECT_NOT_SAME_UNDEF:%.*]] = select i1 [[C]], i32 [[PHI_NOT_SAME]], i32 undef ; CHECK-NEXT: tail call void @use(i32 noundef 1) ; CHECK-NEXT: tail call void @use(i32 noundef [[PHI_NOT_SAME]]) ; CHECK-NEXT: tail call void @use(i32 noundef 1) ; CHECK-NEXT: tail call void @use(i32 1) -; CHECK-NEXT: tail call void @use(i32 [[SELECT_NOT_SAME_UNDEF]]) +; CHECK-NEXT: tail call void @use(i32 [[PHI_NOT_SAME]]) ; CHECK-NEXT: ret void ; %select-same = select i1 %c, i32 1, i32 1 diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index c448587423b19..7b7d1d7aa419f 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC target datalayout = "e-m:e-i54:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll index 60cb7ad4bfd95..0e8d12ba12aae 100644 --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -1,19 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='require,instcombine' -opaque-pointers -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s ; Constant-indexed GEP instructions in a chain of GEP instructions should be ; swapped to the end whenever such transformation is valid. This allows them to ; be merged. +declare void @use(i1) + ; The constant-indexed GEP instruction should be swapped to the end, even ; without merging. -; result = (((i32*) p + a) + b) + 1 +; result = (((ptr) p + a) + b) + 1 define ptr @basic(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @basic( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]] ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -25,34 +27,33 @@ define ptr @basic(ptr %p, i64 %a, i64 %b) { ; GEP with the last index being a constant should also be swapped. define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 - %2 = getelementptr inbounds i32, ptr %1, i64 %b + %2 = getelementptr inbounds i32, ptr %p, i64 %b ret ptr %2 } ; Negative test. GEP should not be swapped if the last index is not a constant. define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a - %2 = getelementptr inbounds i32, ptr %1, i64 %b + %2 = getelementptr inbounds i32, ptr %p, i64 %b ret ptr %2 } -; Constant-indexed GEP are merged after swapping. -; result = ((i32*) p + a) + 3 +; Constant-indexed GEP are merged after swawpping. +; result = ((ptr) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 3 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 +; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %a @@ -63,14 +64,16 @@ define ptr @merge(ptr %p, i64 %a) { ; Multiple constant-indexed GEP. Note that the first two cannot be merged at ; first, but after the second and third are merged, the result can be merged ; with the first one on the next pass. -; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9 +; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9 define ptr @nested(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <3 x i32>, ptr [[TMP3]], i64 10 -; CHECK-NEXT: ret ptr [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1 +; CHECK-NEXT: ret ptr [[TMP6]] ; %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 %a @@ -84,9 +87,9 @@ define ptr @nested(ptr %p, i64 %a, i64 %b) { ; It is valid to swap if the source operand of the first GEP has multiple uses. define ptr @multipleUses1(ptr %p) { ; CHECK-LABEL: @multipleUses1( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] ; CHECK-NEXT: ret ptr [[TMP3]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 @@ -95,10 +98,24 @@ define ptr @multipleUses1(ptr %p) { ret ptr %3 } -; Negative test. It is not valid to swap if the first GEP has multiple uses. -define ptr @multipleUses2(ptr %p) { +; It is valid to swap if the second GEP has multiple uses. +define ptr @multipleUses2(ptr %p, i64 %a) { ; CHECK-LABEL: @multipleUses2( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] +; CHECK-NEXT: call void @use(ptr nonnull [[TMP2]]) +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr %p, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %a + call void @use(ptr %2) + ret ptr %2 +} + +; Negative test. It is not valid to swap if the first GEP has multiple uses. +define ptr @multipleUses3(ptr %p) { +; CHECK-LABEL: @multipleUses3( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] ; CHECK-NEXT: ret ptr [[TMP3]] @@ -108,40 +125,3 @@ define ptr @multipleUses2(ptr %p) { %3 = getelementptr inbounds i32, ptr %1, i64 %2 ret ptr %3 } - -; Negative test. LICM should take priority over canonicalization, so the first -; GEP should not be swapped, even if it contains a constant index. -define i64 @licm(ptr %p) { -; CHECK-LABEL: @licm( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[P2:%.*]] = getelementptr i64, ptr [[P1]], i64 [[I]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P2]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[LOAD]] -; CHECK-NEXT: [[INEXT]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret i64 [[ADD]] -; -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] - %p1 = getelementptr i64, ptr %p, i64 4 - %p2 = getelementptr i64, ptr %p1, i64 %i - %load = load i64, ptr %p2 - %add = add nsw i64 %sum, %load - %inext = add nuw nsw i64 %i, 1 - %exitcond = icmp eq i64 %i, 1000000 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret i64 %add -} diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll index 4a0aade98b2d2..a7c5ff9b6ce8c 100644 --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -9,7 +9,7 @@ target datalayout = "i24:8:8" %struct.B = type { i8, [3 x i16], %struct.A, float } %struct.C = type { i8, i32, i32 } -; result = (i32*) p + 3 +; result = (ptr) p + 3 define ptr @mergeBasic(ptr %p) { ; CHECK-LABEL: @mergeBasic( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3 @@ -20,8 +20,8 @@ define ptr @mergeBasic(ptr %p) { ret ptr %2 } -; Converted to i8* and merged. -; result = (i8*) p + 10 +; Converted to ptr and merged. +; result = (ptr) p + 10 define ptr @mergeDifferentTypes(ptr %p) { ; CHECK-LABEL: @mergeDifferentTypes( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 @@ -32,8 +32,8 @@ define ptr @mergeDifferentTypes(ptr %p) { ret ptr %2 } -; Converted to i8* and merged. -; result = (i8*) p + 10 +; Converted to ptr and merged. +; result = (ptr) p + 10 define ptr @mergeReverse(ptr %p) { ; CHECK-LABEL: @mergeReverse( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 @@ -55,7 +55,7 @@ define ptr @zeroSum(ptr %p) { ret ptr %2 } -; result = (i8*) (([20 x i8]*) p + 1) + 17 +; result = (ptr) ((ptr) p + 1) + 17 define ptr @array1(ptr %p) { ; CHECK-LABEL: @array1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17 @@ -66,8 +66,8 @@ define ptr @array1(ptr %p) { ret ptr %2 } -; Converted to i8* and merged. -; result = (i8*) p + 20 +; Converted to ptr and merged. +; result = (ptr) p + 20 define ptr @array2(ptr %p) { ; CHECK-LABEL: @array2( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20 @@ -78,8 +78,8 @@ define ptr @array2(ptr %p) { ret ptr %2 } -; Converted to i8* and merged. -; result = (i8*) p + 36 +; Converted to ptr and merged. +; result = (ptr) p + 36 define ptr @struct1(ptr %p) { ; CHECK-LABEL: @struct1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36 @@ -101,7 +101,7 @@ define ptr @struct2(ptr %p) { ret ptr %2 } -; result = (i8*) &((struct.B) p)[0].member2.member0 + 7 +; result = (ptr) &((struct.B) p)[0].member2.member0 + 7 define ptr @structStruct(ptr %p) { ; CHECK-LABEL: @structStruct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7 @@ -115,7 +115,7 @@ define ptr @structStruct(ptr %p) { ; First GEP offset is not divisible by last GEP's source element size, but first ; GEP points to an array such that the last GEP offset is divisible by the ; array's element size, so the first GEP can be rewritten with an extra index. -; result = (i16*) &((struct.B*) p)[i].member1 + 2 +; result = (ptr) &((struct.B*) p)[i].member1 + 2 define ptr @appendIndex(ptr %p, i64 %i) { ; CHECK-LABEL: @appendIndex( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 1, i64 2 @@ -126,23 +126,10 @@ define ptr @appendIndex(ptr %p, i64 %i) { ret ptr %2 } -; After canonicalizing, the second GEP is moved to the front, and then merged -; with the first one with rewritten indices. -; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2 -define ptr @appendIndexReverse(ptr %p, i64 %i) { -; CHECK-LABEL: @appendIndexReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 2, i32 0, i64 2 -; CHECK-NEXT: ret ptr [[TMP1]] -; - %1 = getelementptr inbounds i64, ptr %p, i64 1 - %2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1 - ret ptr %2 -} - -; Offset of either GEP is not divisible by the other's size, converted to i8* +; Offset of either GEP is not divisible by the other's size, converted to ptr ; and merged. ; Here i24 is 8-bit aligned. -; result = (i8*) p + 7 +; result = (ptr) p + 7 define ptr @notDivisible(ptr %p) { ; CHECK-LABEL: @notDivisible( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7 @@ -157,8 +144,8 @@ define ptr @notDivisible(ptr %p) { ; or divisible by the other's size. define ptr @partialConstant2(ptr %p, i64 %a) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2 ; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 index 0000000000000..25178f64ef0d8 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,284 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +target datalayout="p5:32:32-A5" + +@g1 = constant [32 x i8] zeroinitializer +@g2 = addrspace(1) constant [32 x i8] zeroinitializer + +define i8 @remove_alloca_use_arg(i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[IF]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca(i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 4 dereferenceable(256) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(256) @g1, i64 256, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + + +define i8 @no_memcpy_keep_alloca(i1 %cond) { +; CHECK-LABEL: @no_memcpy_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @loop_phi_remove_alloca(i1 %cond) { +; CHECK-LABEL: @loop_phi_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i32 @remove_alloca_ptr_arg(i1 %c, ptr %ptr) { +; CHECK-LABEL: @remove_alloca_ptr_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ @g1, [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %alloca = alloca [32 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %alloca, %if ], [ %ptr, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +define i8 @loop_phi_late_memtransfer_remove_alloca(i1 %cond) { +; CHECK-LABEL: @loop_phi_late_memtransfer_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) { +; CHECK-LABEL: @test_memcpy_after_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %ptr, %entry ] + call void @llvm.memcpy.p0.p0.i64(ptr %phi, ptr @g1, i64 32, i1 false) + %v = load i32, ptr %phi + ret i32 %v +} + +define i32 @addrspace_diff_keep_alloca(i1 %cond, ptr %x) { +; CHECK-LABEL: @addrspace_diff_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[A]], ptr addrspace(1) noundef align 16 dereferenceable(32) @g2, i64 32, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[X:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %x, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +define i32 @phi_loop(i1 %c) { +; CHECK-LABEL: @phi_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ @g1, [[ENTRY:%.*]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_NEXT]] = getelementptr i8, ptr [[PTR]], i64 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %alloca = alloca [32 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) + br label %loop + +loop: + %ptr = phi ptr [ %alloca, %entry ], [ %ptr.next, %loop ] + %ptr.next = getelementptr i8, ptr %ptr, i64 4 + br i1 %c, label %exit, label %loop + +exit: + %v = load i32, ptr %ptr + ret i32 %v +} + +declare void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1), ptr, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +declare void @llvm.memcpy.p0.p1.i64(ptr, ptr addrspace(1), i64, i1) diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 0a97efb225a61..eb5e37bad3844 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1743,10 +1743,10 @@ define void @ashr_out_of_range(ptr %A) { define void @ashr_out_of_range_1(ptr %A) { ; CHECK-LABEL: @ashr_out_of_range_1( ; CHECK-NEXT: [[L:%.*]] = load i177, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 -1 ; CHECK-NEXT: [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64 -; CHECK-NEXT: [[G111:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G111]], i64 -1 +; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP1]] ; CHECK-NEXT: store i177 0, ptr [[G62]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index b02fd162fdeee..94ccfec39e22e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -38,7 +38,8 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> @@ -49,7 +50,8 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index d509f01611043..aa0a253a9a113 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -762,9 +762,11 @@ define void @mixed_load3_store3(i32* nocapture %A) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll index 127139d24fac4..3975a0dd33748 100644 --- a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll +++ b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll @@ -11,14 +11,14 @@ define i16 @helper(i16 %0, i64 %x) { ; CHECK-NEXT: start: ; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2 ; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1 ; CHECK-NEXT: br label [[BB6_I_I:%.*]] ; CHECK: bb6.i.i: ; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ] ; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]] ; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1 ; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]] -; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP1:%.*]], i64 1 +; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]] ; CHECK-NEXT: [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1 ; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1 ; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1 diff --git a/llvm/test/Transforms/StructurizeCFG/structurizecfg-debug-loc.ll b/llvm/test/Transforms/StructurizeCFG/structurizecfg-debug-loc.ll new file mode 100644 index 0000000000000..e57464ad3b321 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/structurizecfg-debug-loc.ll @@ -0,0 +1,202 @@ +; RUN: opt -S -o - -structurizecfg %s | FileCheck %s + +define void @if_then_else(i32 addrspace(1)* %out, i1 %arg) !dbg !7 { +; CHECK: @if_then_else( +; CHECK: entry: +; CHECK: br i1 {{.*}}, label %if.else, label %Flow, !dbg [[ITE_ENTRY_DL:![0-9]+]] +; CHECK: Flow: +; CHECK: br i1 {{.*}}, label %if.then, label %exit, !dbg [[ITE_ENTRY_DL]] +; CHECK: if.then: +; CHECK: br label %exit, !dbg [[ITE_IFTHEN_DL:![0-9]+]] +; CHECK: if.else: +; CHECK: br label %Flow, !dbg [[ITE_IFELSE_DL:![0-9]+]] +; CHECK: exit: +; +entry: + br i1 %arg, label %if.then, label %if.else, !dbg !8 + +if.then: + store i32 0, i32 addrspace(1)* %out, !dbg !9 + br label %exit, !dbg !10 + +if.else: + store i32 1, i32 addrspace(1)* %out, !dbg !11 + br label %exit, !dbg !12 + +exit: + ret void, !dbg !13 +} + +define void @while_loop(i32 addrspace(1)* %out) !dbg !14 { +; CHECK: @while_loop( +; CHECK: entry: +; CHECK: br label %while.header, !dbg [[WHILE_ENTRY_DL:![0-9]+]] +; CHECK: while.header: +; CHECK: br i1 {{.*}}, label %while.body, label %Flow, !dbg [[WHILE_HEADER_DL:![0-9]+]] +; CHECK: while.body: +; CHECK: br label %Flow, !dbg [[WHILE_BODY_DL:![0-9]+]] +; CHECK: Flow: +; CHECK: br i1 {{.*}}, label %exit, label %while.header, !dbg [[WHILE_HEADER_DL]] +; CHECK: exit: +; +entry: + br label %while.header, !dbg !15 + +while.header: + %cond = call i1 @loop_condition(), !dbg !16 + br i1 %cond, label %while.body, label %exit, !dbg !17 + +while.body: + store i32 1, i32 addrspace(1)* %out, !dbg !18 + br label %while.header, !dbg !19 + +exit: + ret void, !dbg !20 +} + +define void @while_multiple_exits(i32 addrspace(1)* %out) !dbg !21 { +; CHECK: @while_multiple_exits( +; CHECK: entry: +; CHECK: br label %while.header, !dbg [[WHILEME_ENTRY_DL:![0-9]+]] +; CHECK: while.header: +; CHECK: br i1 {{.*}}, label %while.exiting, label %Flow, !dbg [[WHILEME_HEADER_DL:![0-9]+]] +; CHECK: while.exiting: +; CHECK: br label %Flow, !dbg [[WHILEME_EXITING_DL:![0-9]+]] +; CHECK: Flow: +; CHECK: br i1 {{.*}}, label %exit, label %while.header, !dbg [[WHILEME_HEADER_DL]] +; CHECK: exit: +; +entry: + br label %while.header, !dbg !22 + +while.header: + %cond0 = call i1 @loop_condition(), !dbg !23 + br i1 %cond0, label %while.exiting, label %exit, !dbg !24 + +while.exiting: + %cond1 = call i1 @loop_condition(), !dbg !25 + br i1 %cond1, label %while.header, label %exit, !dbg !26 + +exit: + ret void, !dbg !27 +} + +define void @nested_if_then_else(i32 addrspace(1)* %out, i1 %a, i1 %b) !dbg !28 { +; CHECK: @nested_if_then_else( +; CHECK: entry: +; CHECK: br i1 {{.*}}, label %if.else, label %Flow4, !dbg [[NESTED_ENTRY_DL:![0-9]+]] +; CHECK: Flow4: +; CHECK: br i1 {{.*}}, label %if.then, label %exit, !dbg [[NESTED_ENTRY_DL]] +; CHECK: if.then: +; CHECK: br i1 {{.*}}, label %if.then.else, label %Flow2, !dbg [[NESTED_IFTHEN_DL:![0-9]+]] +; CHECK: Flow2: +; CHECK: br i1 {{.*}}, label %if.then.then, label %Flow3, !dbg [[NESTED_IFTHEN_DL]] +; CHECK: if.then.then: +; CHECK: br label %Flow3, !dbg [[NESTED_IFTHENTHEN_DL:![0-9]+]] +; CHECK: if.then.else: +; CHECK: br label %Flow2, !dbg [[NESTED_IFTHENELSE_DL:![0-9]+]] +; CHECK: if.else: +; CHECK: br i1 {{.*}}, label %if.else.else, label %Flow, !dbg [[NESTED_IFELSE_DL:![0-9]+]] +; CHECK: Flow: +; CHECK: br i1 {{.*}}, label %if.else.then, label %Flow1, !dbg [[NESTED_IFELSE_DL]] +; CHECK: if.else.then: +; CHECK: br label %Flow1, !dbg [[NESTED_IFELSETHEN_DL:![0-9]+]] +; CHECK: if.else.else: +; CHECK: br label %Flow, !dbg [[NESTED_IFELSEELSE_DL:![0-9]+]] +; CHECK: Flow1: +; CHECK: br label %Flow4, !dbg [[NESTED_IFELSE_DL]] +; CHECK: Flow3: +; CHECK: br label %exit, !dbg [[NESTED_IFTHEN_DL]] +; CHECK: exit: +; +entry: + br i1 %a, label %if.then, label %if.else, !dbg !29 + +if.then: + br i1 %b, label %if.then.then, label %if.then.else, !dbg !30 + +if.then.then: + store i32 0, i32 addrspace(1)* %out, !dbg !31 + br label %exit, !dbg !32 + +if.then.else: + store i32 1, i32 addrspace(1)* %out, !dbg !33 + br label %exit, !dbg !34 + +if.else: + br i1 %b, label %if.else.then, label %if.else.else, !dbg !35 + +if.else.then: + store i32 2, i32 addrspace(1)* %out, !dbg !36 + br label %exit, !dbg !37 + +if.else.else: + store i32 3, i32 addrspace(1)* %out, !dbg !38 + br label %exit, !dbg !39 + +exit: + ret void, !dbg !40 +} + +declare i1 @loop_condition() + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!4, !5} + +; CHECK: [[ITE_ENTRY_DL]] = !DILocation(line: 2 +; CHECK: [[ITE_IFTHEN_DL]] = !DILocation(line: 4 +; CHECK: [[ITE_IFELSE_DL]] = !DILocation(line: 6 +; CHECK: [[WHILE_ENTRY_DL]] = !DILocation(line: 2 +; CHECK: [[WHILE_HEADER_DL]] = !DILocation(line: 4 +; CHECK: [[WHILE_BODY_DL]] = !DILocation(line: 6 +; CHECK: [[WHILEME_ENTRY_DL]] = !DILocation(line: 2 +; CHECK: [[WHILEME_HEADER_DL]] = !DILocation(line: 4 +; CHECK: [[WHILEME_EXITING_DL]] = !DILocation(line: 6 +; CHECK: [[NESTED_ENTRY_DL]] = !DILocation(line: 2 +; CHECK: [[NESTED_IFTHEN_DL]] = !DILocation(line: 3 +; CHECK: [[NESTED_IFTHENTHEN_DL]] = !DILocation(line: 5 +; CHECK: [[NESTED_IFTHENELSE_DL]] = !DILocation(line: 7 +; CHECK: [[NESTED_IFELSE_DL]] = !DILocation(line: 8 +; CHECK: [[NESTED_IFELSETHEN_DL]] = !DILocation(line: 10 +; CHECK: [[NESTED_IFELSEELSE_DL]] = !DILocation(line: 12 + +!0 = !{} +!1 = !DIFile(filename: "dummy.ll", directory: "/some/random/directory") +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !0) +!4 = !{i32 2, !"Dwarf Version", i32 5} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = !DISubroutineType(types: !0) +!7 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !2, retainedNodes: !0) +!8 = !DILocation(line: 2, scope: !7) +!9 = !DILocation(line: 3, scope: !7) +!10 = !DILocation(line: 4, scope: !7) +!11 = !DILocation(line: 5, scope: !7) +!12 = !DILocation(line: 6, scope: !7) +!13 = !DILocation(line: 7, scope: !7) +!14 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !2, retainedNodes: !0) +!15 = !DILocation(line: 2, scope: !14) +!16 = !DILocation(line: 3, scope: !14) +!17 = !DILocation(line: 4, scope: !14) +!18 = !DILocation(line: 5, scope: !14) +!19 = !DILocation(line: 6, scope: !14) +!20 = !DILocation(line: 7, scope: !14) +!21 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !2, retainedNodes: !0) +!22 = !DILocation(line: 2, scope: !21) +!23 = !DILocation(line: 3, scope: !21) +!24 = !DILocation(line: 4, scope: !21) +!25 = !DILocation(line: 5, scope: !21) +!26 = !DILocation(line: 6, scope: !21) +!27 = !DILocation(line: 7, scope: !21) +!28 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !2, retainedNodes: !0) +!29 = !DILocation(line: 2, scope: !28) +!30 = !DILocation(line: 3, scope: !28) +!31 = !DILocation(line: 4, scope: !28) +!32 = !DILocation(line: 5, scope: !28) +!33 = !DILocation(line: 6, scope: !28) +!34 = !DILocation(line: 7, scope: !28) +!35 = !DILocation(line: 8, scope: !28) +!36 = !DILocation(line: 9, scope: !28) +!37 = !DILocation(line: 10, scope: !28) +!38 = !DILocation(line: 11, scope: !28) +!39 = !DILocation(line: 12, scope: !28) +!40 = !DILocation(line: 13, scope: !28) diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test index 4662abebc4138..e3b9fb46b5c3c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test @@ -304,6 +304,33 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 -DFLAG_VALUE=0x3E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 -DFLAG_VALUE=0x3E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1034 -DFLAG_VALUE=0x3E + +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 -DFLAG_VALUE=0x3D +# +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 -DFLAG_VALUE=0x3D +# +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1035 -DFLAG_VALUE=0x3D +# +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3" # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F diff --git a/llvm/unittests/ADT/PointerIntPairTest.cpp b/llvm/unittests/ADT/PointerIntPairTest.cpp index 8a42e5b9f5571..eef79745f4c7d 100644 --- a/llvm/unittests/ADT/PointerIntPairTest.cpp +++ b/llvm/unittests/ADT/PointerIntPairTest.cpp @@ -62,6 +62,10 @@ TEST(PointerIntPairTest, GetSet) { EXPECT_EQ(&s, Pair2.getPointer()); EXPECT_EQ(E::Case3, Pair2.getInt()); + auto [Pointer2, Int2] = Pair2; + EXPECT_EQ(Pair2.getPointer(), Pointer2); + EXPECT_EQ(Pair2.getInt(), Int2); + static_assert(std::is_trivially_copyable>::value, "trivially copyable"); } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index a19ffb2ef2516..b1bf81adc257d 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -1033,10 +1033,10 @@ void InstrInfoEmitter::run(raw_ostream &OS) { OS << "struct " << ClassName << " : public TargetInstrInfo {\n" << " explicit " << ClassName << "(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, " - "unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u);\n" + "unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u, " + "unsigned CopyOpcode = TargetOpcode::COPY);\n" << " ~" << ClassName << "() override = default;\n"; - OS << "\n};\n} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_HEADER\n\n"; @@ -1067,9 +1067,9 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << "InstrComplexDeprecationInfos[];\n"; OS << ClassName << "::" << ClassName << "(unsigned CFSetupOpcode, unsigned CFDestroyOpcode, unsigned " - "CatchRetOpcode, unsigned ReturnOpcode)\n" + "CatchRetOpcode, unsigned ReturnOpcode, unsigned CopyOpcode)\n" << " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode, " - "ReturnOpcode) {\n" + "ReturnOpcode, CopyOpcode) {\n" << " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "; if (HasDeprecationFeatures) diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp index eb33071dc1ae7..27e9c02966d6b 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -265,7 +265,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { // This constant must always match the default code object ABI version // of the AMDGPU backend. - addControlConstant("__oclc_ABI_version", 400, 32); + addControlConstant("__oclc_ABI_version", 500, 32); } // Determine libraries we need to link - order matters due to dependencies diff --git a/openmp/libompd/CMakeLists.txt b/openmp/libompd/CMakeLists.txt index 9915a937f486a..06a4fc7d932fd 100644 --- a/openmp/libompd/CMakeLists.txt +++ b/openmp/libompd/CMakeLists.txt @@ -24,6 +24,10 @@ if(LIBOMP_OMPD_SUPPORT) "Install path for hwloc library") if(LIBOMP_OMPD_GDB_SUPPORT) + find_package(LLVM) # Required for LLVM dynamic library support add_subdirectory(gdb-plugin) + # temporarily disabled + # test/CMakeLists.txt breaks DeviceRTL in LLVM_ENABLE_PROJECTS builds. + # add_subdirectory(test) endif() endif() diff --git a/openmp/libompd/gdb-plugin/CMakeLists.txt b/openmp/libompd/gdb-plugin/CMakeLists.txt index 1f7c87f7a9d3f..ca6f9cfcde98c 100644 --- a/openmp/libompd/gdb-plugin/CMakeLists.txt +++ b/openmp/libompd/gdb-plugin/CMakeLists.txt @@ -58,9 +58,13 @@ find_package (PythonLibs REQUIRED) include_directories (${OMPD_INCLUDE_PATH}) include_directories (${LIBOMP_INCLUDE_DIR}) + +# Needed for dlsym in the module. +find_library(CLANG_CPP clang-cpp HINTS ${LLVM_LIBRARY_DIR} ${LLVM_LIBRARY_DIR}/../lib REQUIRED) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/python-module/ompd/__init__.py - DEPENDS ompdModule.c ompdAPITests.c setup.py ompd/frame_filter.py ompd/__init__.py ompd/ompd_address_space.py ompd/ompd_callbacks.py ompd/ompd_handles.py ompd/ompd.py - COMMAND ${CMAKE_COMMAND} -E env LIBOMP_INCLUDE_DIR=${LIBOMP_INCLUDE_DIR} + DEPENDS ompdModule.c DLSymService.cpp ompdAPITests.c setup.py ompd/frame_filter.py ompd/__init__.py ompd/ompd_address_space.py ompd/ompd_callbacks.py ompd/ompd_handles.py ompd/ompd.py + COMMAND ${CMAKE_COMMAND} -E env LIBOMP_INCLUDE_DIR=${LIBOMP_INCLUDE_DIR} LLVM_MAIN_INCLUDE_DIR=${LLVM_MAIN_INCLUDE_DIR} CLANG_CPP=${CLANG_CPP} INSTALL_LIB_LOC=${CLANG_CPP}../ ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/setup.py -v bdist_wheel -b ${CMAKE_CURRENT_BINARY_DIR}/build -d ${CMAKE_CURRENT_BINARY_DIR} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/setup.py clean --all COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_SOURCE_DIR}/ompd.egg-info @@ -71,12 +75,17 @@ add_custom_target(ompd_gdb_plugin ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/python-module/ompd/__init__.py COMMENT "Building the OMPD GDB plugin") -add_library (ompdModule MODULE ompdModule.c ompdAPITests.c) +add_library (ompdModule MODULE ompdModule.c ompdAPITests.c DLSymService.cpp) + +## Include LLVM headers for DynamicLibrary support +target_include_directories(ompdModule PRIVATE ${LLVM_INCLUDE_DIRS}) + include_directories ( ${LIBOMP_INCLUDE_DIR} ${LIBOMP_SRC_DIR} ${Python3_INCLUDE_DIRS} ) + target_link_libraries (ompdModule ${Python3_LIBRARIES}) target_link_libraries (ompdModule ${CMAKE_DL_LIBS}) diff --git a/openmp/libompd/gdb-plugin/DLSymService.cpp b/openmp/libompd/gdb-plugin/DLSymService.cpp new file mode 100644 index 0000000000000..a417c63e31df6 --- /dev/null +++ b/openmp/libompd/gdb-plugin/DLSymService.cpp @@ -0,0 +1,72 @@ +/* + * DLSymService.cpp + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DynamicLibrary.h" +#include + +bool ErrorOccured = false; +std::shared_ptr OMPDLibrary = nullptr; + +void *getSymbolForFunction(const char *name) { + if (!OMPDLibrary || !OMPDLibrary->isValid()) { + ErrorOccured = true; + return nullptr; + } + + auto SymAddr = OMPDLibrary->getAddressOfSymbol(name); + if (!SymAddr) { + ErrorOccured = true; + } + // Leave cast to user + return SymAddr; +} + +void loadLibraryWithName(const char *name) { + if (OMPDLibrary && OMPDLibrary->isValid()) { + return; + } + + std::string errMsg; + OMPDLibrary = std::make_shared( + llvm::sys::DynamicLibrary::getPermanentLibrary(name, &errMsg)); + if (!OMPDLibrary->isValid()) { + ErrorOccured = true; + } + ErrorOccured = false; +} + +bool errorOccured() { + bool oldVal = ErrorOccured; + ErrorOccured = false; + return oldVal; +} + +const char *getErrorStr() { + return "An error occured"; +} + +extern "C" { +void *get_dlsym_for_name(const char *name) { + return getSymbolForFunction(name); +} + +void get_library_with_name(const char *name) { + return loadLibraryWithName(name); +} + +const char *get_error() { + if (!errorOccured()) { + return nullptr; + } + return getErrorStr(); +} +} \ No newline at end of file diff --git a/openmp/libompd/gdb-plugin/DLSymService.h b/openmp/libompd/gdb-plugin/DLSymService.h new file mode 100644 index 0000000000000..82d76908e3743 --- /dev/null +++ b/openmp/libompd/gdb-plugin/DLSymService.h @@ -0,0 +1,21 @@ +/* + * DLSymService.h + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __cplusplus +extern "C" { +#endif +void *get_dlsym_for_name(const char *name); +void *get_library_with_name(const char *name); +const char *get_error(); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/openmp/libompd/gdb-plugin/ompdAPITests.c b/openmp/libompd/gdb-plugin/ompdAPITests.c index 912914c7b8c9b..36da959c31a13 100644 --- a/openmp/libompd/gdb-plugin/ompdAPITests.c +++ b/openmp/libompd/gdb-plugin/ompdAPITests.c @@ -1,3 +1,4 @@ +#include "DLSymService.h" #include #include #include @@ -7,7 +8,6 @@ #include #include -extern void *ompd_library; struct _ompd_aspace_cont { int id; @@ -810,7 +810,7 @@ PyObject *test_ompd_initialize(PyObject *self, PyObject *noargs) { printf("Test: With Correct Arguments.\n"); ompd_rc_t (*my_ompd_init)(ompd_word_t version, ompd_callbacks_t *) = - dlsym(ompd_library, "ompd_initialize"); + get_dlsym_for_name("ompd_initialize"); rc = my_ompd_init(version, &table); if (rc != ompd_rc_ok) { printf("Failed, with return code = %d\n", rc); diff --git a/openmp/libompd/gdb-plugin/ompdModule.c b/openmp/libompd/gdb-plugin/ompdModule.c index df602c54aba37..9776a3ecccd58 100644 --- a/openmp/libompd/gdb-plugin/ompdModule.c +++ b/openmp/libompd/gdb-plugin/ompdModule.c @@ -10,17 +10,17 @@ // //===----------------------------------------------------------------------===// +#include "DLSymService.h" + #include #include // #include -#include #include #include #include #include #include -void *ompd_library; #define OMPD_WEAK_ATTR __attribute__((weak)) @@ -41,8 +41,8 @@ ompd_rc_t _print(const char *str, int category); OMPD_WEAK_ATTR ompd_rc_t ompd_get_api_version(ompd_word_t *addr) { static ompd_rc_t (*my_get_api_version)(ompd_word_t *) = NULL; if (!my_get_api_version) { - my_get_api_version = dlsym(ompd_library, "ompd_get_api_version"); - if (dlerror()) { + my_get_api_version = get_dlsym_for_name("ompd_get_api_version"); + if (get_error()) { return ompd_rc_error; } } @@ -52,8 +52,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_api_version(ompd_word_t *addr) { OMPD_WEAK_ATTR ompd_rc_t ompd_get_version_string(const char **string) { static ompd_rc_t (*my_get_version_string)(const char **) = NULL; if (!my_get_version_string) { - my_get_version_string = dlsym(ompd_library, "ompd_get_version_string"); - if (dlerror()) { + my_get_version_string = get_dlsym_for_name("ompd_get_version_string"); + if (get_error()) { return ompd_rc_error; } } @@ -63,8 +63,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_version_string(const char **string) { OMPD_WEAK_ATTR ompd_rc_t ompd_finalize(void) { static ompd_rc_t (*my_ompd_finalize)(void) = NULL; if (!my_ompd_finalize) { - my_ompd_finalize = dlsym(ompd_library, "ompd_finalize"); - if (dlerror()) { + my_ompd_finalize = get_dlsym_for_name("ompd_finalize"); + if (get_error()) { return ompd_rc_error; } } @@ -77,8 +77,8 @@ ompd_process_initialize(ompd_address_space_context_t *context, static ompd_rc_t (*my_ompd_process_initialize)( ompd_address_space_context_t *, ompd_address_space_handle_t **) = NULL; if (!my_ompd_process_initialize) { - my_ompd_process_initialize = dlsym(ompd_library, "ompd_process_initialize"); - if (dlerror()) { + my_ompd_process_initialize = get_dlsym_for_name("ompd_process_initialize"); + if (get_error()) { return ompd_rc_error; } } @@ -90,8 +90,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_omp_version( static ompd_rc_t (*my_ompd_get_omp_version)(ompd_address_space_handle_t *, ompd_word_t *) = NULL; if (!my_ompd_get_omp_version) { - my_ompd_get_omp_version = dlsym(ompd_library, "ompd_get_omp_version"); - if (dlerror()) { + my_ompd_get_omp_version = get_dlsym_for_name("ompd_get_omp_version"); + if (get_error()) { return ompd_rc_error; } } @@ -104,8 +104,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_omp_version_string( ompd_address_space_handle_t *, const char **) = NULL; if (!my_ompd_get_omp_version_string) { my_ompd_get_omp_version_string = - dlsym(ompd_library, "ompd_get_omp_version_string"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_omp_version_string"); + if (get_error()) { return ompd_rc_error; } } @@ -119,8 +119,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_thread_handle( ompd_address_space_handle_t *, ompd_thread_id_t, ompd_size_t, const void *, ompd_thread_handle_t **) = NULL; if (!my_get_thread_handle) { - my_get_thread_handle = dlsym(ompd_library, "ompd_get_thread_handle"); - if (dlerror()) { + my_get_thread_handle = get_dlsym_for_name("ompd_get_thread_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -134,8 +134,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_thread_in_parallel( ompd_thread_handle_t **) = NULL; if (!my_get_thread_in_parallel) { my_get_thread_in_parallel = - dlsym(ompd_library, "ompd_get_thread_in_parallel"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_thread_in_parallel"); + if (get_error()) { return ompd_rc_error; } } @@ -148,9 +148,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_thread_handle_compare( static ompd_rc_t (*my_thread_handle_compare)( ompd_thread_handle_t *, ompd_thread_handle_t *, int *) = NULL; if (!my_thread_handle_compare) { - my_thread_handle_compare = - dlsym(ompd_library, "ompd_thread_handle_compare"); - if (dlerror()) { + my_thread_handle_compare = get_dlsym_for_name("ompd_thread_handle_compare"); + if (get_error()) { return ompd_rc_error; } } @@ -164,8 +163,8 @@ ompd_get_curr_parallel_handle(ompd_thread_handle_t *threadHandle, ompd_thread_handle_t *, ompd_parallel_handle_t **) = NULL; if (!my_get_current_parallel_handle) { my_get_current_parallel_handle = - dlsym(ompd_library, "ompd_get_curr_parallel_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_curr_parallel_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -179,8 +178,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_parallel_handle_compare( ompd_parallel_handle_t *, ompd_parallel_handle_t *, int *) = NULL; if (!my_parallel_handle_compare) { my_parallel_handle_compare = - dlsym(ompd_library, "ompd_parallel_handle_compare"); - if (dlerror()) { + get_dlsym_for_name("ompd_parallel_handle_compare"); + if (get_error()) { return ompd_rc_error; } } @@ -195,8 +194,8 @@ ompd_get_enclosing_parallel_handle(ompd_parallel_handle_t *parallelHandle, ompd_parallel_handle_t *, ompd_parallel_handle_t **) = NULL; if (!my_get_enclosing_parallel_handle) { my_get_enclosing_parallel_handle = - dlsym(ompd_library, "ompd_get_enclosing_parallel_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_enclosing_parallel_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -210,8 +209,8 @@ ompd_get_task_parallel_handle(ompd_task_handle_t *taskHandle, ompd_task_handle_t *, ompd_parallel_handle_t **) = NULL; if (!my_get_task_parallel_handle) { my_get_task_parallel_handle = - dlsym(ompd_library, "ompd_get_task_parallel_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_task_parallel_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -224,8 +223,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_curr_task_handle( ompd_task_handle_t **) = NULL; if (!my_get_current_task_handle) { my_get_current_task_handle = - dlsym(ompd_library, "ompd_get_curr_task_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_curr_task_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -238,8 +237,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_generating_task_handle( ompd_task_handle_t *, ompd_task_handle_t **) = NULL; if (!my_get_generating_task_handle) { my_get_generating_task_handle = - dlsym(ompd_library, "ompd_get_generating_task_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_generating_task_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -252,8 +251,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_scheduling_task_handle( ompd_task_handle_t *, ompd_task_handle_t **) = NULL; if (!my_get_scheduling_task_handle) { my_get_scheduling_task_handle = - dlsym(ompd_library, "ompd_get_scheduling_task_handle"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_scheduling_task_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -266,8 +265,8 @@ ompd_get_task_in_parallel(ompd_parallel_handle_t *parallelHandle, int threadNum, static ompd_rc_t (*my_get_task_in_parallel)(ompd_parallel_handle_t *, int, ompd_task_handle_t **) = NULL; if (!my_get_task_in_parallel) { - my_get_task_in_parallel = dlsym(ompd_library, "ompd_get_task_in_parallel"); - if (dlerror()) { + my_get_task_in_parallel = get_dlsym_for_name("ompd_get_task_in_parallel"); + if (get_error()) { return ompd_rc_error; } } @@ -280,8 +279,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_task_frame(ompd_task_handle_t *taskHandle, static ompd_rc_t (*my_get_task_frame)( ompd_task_handle_t *, ompd_frame_info_t *, ompd_frame_info_t *) = NULL; if (!my_get_task_frame) { - my_get_task_frame = dlsym(ompd_library, "ompd_get_task_frame"); - if (dlerror()) { + my_get_task_frame = get_dlsym_for_name("ompd_get_task_frame"); + if (get_error()) { return ompd_rc_error; } } @@ -295,8 +294,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_icv_from_scope(void *handle, static ompd_rc_t (*my_get_icv_from_scope)(void *, ompd_scope_t, ompd_icv_id_t, ompd_word_t *) = NULL; if (!my_get_icv_from_scope) { - my_get_icv_from_scope = dlsym(ompd_library, "ompd_get_icv_from_scope"); - if (dlerror()) { + my_get_icv_from_scope = get_dlsym_for_name("ompd_get_icv_from_scope"); + if (get_error()) { return ompd_rc_error; } } @@ -311,8 +310,8 @@ ompd_enumerate_icvs(ompd_address_space_handle_t *handle, ompd_icv_id_t current, ompd_address_space_handle_t *, ompd_icv_id_t, ompd_icv_id_t *, const char **, ompd_scope_t *, int *) = NULL; if (!my_enumerate_icvs) { - my_enumerate_icvs = dlsym(ompd_library, "ompd_enumerate_icvs"); - if (dlerror()) { + my_enumerate_icvs = get_dlsym_for_name("ompd_enumerate_icvs"); + if (get_error()) { return ompd_rc_error; } } @@ -327,8 +326,8 @@ ompd_enumerate_states(ompd_address_space_handle_t *addrSpaceHandle, ompd_word_t, ompd_word_t *, const char **, ompd_word_t *) = NULL; if (!my_enumerate_states) { - my_enumerate_states = dlsym(ompd_library, "ompd_enumerate_states"); - if (dlerror()) { + my_enumerate_states = get_dlsym_for_name("ompd_enumerate_states"); + if (get_error()) { return ompd_rc_error; } } @@ -342,8 +341,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_state(ompd_thread_handle_t *threadHandle, static ompd_rc_t (*my_get_state)(ompd_thread_handle_t *, ompd_word_t *, ompd_wait_id_t *) = NULL; if (!my_get_state) { - my_get_state = dlsym(ompd_library, "ompd_get_state"); - if (dlerror()) { + my_get_state = get_dlsym_for_name("ompd_get_state"); + if (get_error()) { return ompd_rc_error; } } @@ -355,8 +354,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_task_function(ompd_task_handle_t *taskHandle, static ompd_rc_t (*my_get_task_function)(ompd_task_handle_t *, ompd_address_t *) = NULL; if (!my_get_task_function) { - my_get_task_function = dlsym(ompd_library, "ompd_get_task_function"); - if (dlerror()) { + my_get_task_function = get_dlsym_for_name("ompd_get_task_function"); + if (get_error()) { return ompd_rc_error; } } @@ -369,8 +368,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_thread_id(ompd_thread_handle_t *threadHandle, static ompd_rc_t (*my_get_thread_id)(ompd_thread_handle_t *, ompd_thread_id_t, ompd_size_t, void *) = NULL; if (!my_get_thread_id) { - my_get_thread_id = dlsym(ompd_library, "ompd_get_thread_id"); - if (dlerror()) { + my_get_thread_id = get_dlsym_for_name("ompd_get_thread_id"); + if (get_error()) { return ompd_rc_error; } } @@ -383,8 +382,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_get_tool_data(void *handle, ompd_scope_t scope, static ompd_rc_t (*my_get_tool_data)(void *, ompd_scope_t, ompd_word_t *, ompd_address_t *) = NULL; if (!my_get_tool_data) { - my_get_tool_data = dlsym(ompd_library, "ompd_get_tool_data"); - if (dlerror()) { + my_get_tool_data = get_dlsym_for_name("ompd_get_tool_data"); + if (get_error()) { return ompd_rc_error; } } @@ -398,8 +397,8 @@ ompd_get_icv_string_from_scope(void *handle, ompd_scope_t scope, void *, ompd_scope_t, ompd_icv_id_t, const char **) = NULL; if (!my_get_icv_string_from_scope) { my_get_icv_string_from_scope = - dlsym(ompd_library, "ompd_get_icv_string_from_scope"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_icv_string_from_scope"); + if (get_error()) { return ompd_rc_error; } } @@ -410,8 +409,8 @@ OMPD_WEAK_ATTR ompd_rc_t ompd_rel_thread_handle(ompd_thread_handle_t *threadHandle) { static ompd_rc_t (*my_release_thread_handle)(ompd_thread_handle_t *) = NULL; if (!my_release_thread_handle) { - my_release_thread_handle = dlsym(ompd_library, "ompd_rel_thread_handle"); - if (dlerror()) { + my_release_thread_handle = get_dlsym_for_name("ompd_rel_thread_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -423,9 +422,8 @@ ompd_rel_parallel_handle(ompd_parallel_handle_t *parallelHandle) { static ompd_rc_t (*my_release_parallel_handle)(ompd_parallel_handle_t *) = NULL; if (!my_release_parallel_handle) { - my_release_parallel_handle = - dlsym(ompd_library, "ompd_rel_parallel_handle"); - if (dlerror()) { + my_release_parallel_handle = get_dlsym_for_name("ompd_rel_parallel_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -435,8 +433,8 @@ ompd_rel_parallel_handle(ompd_parallel_handle_t *parallelHandle) { OMPD_WEAK_ATTR ompd_rc_t ompd_rel_task_handle(ompd_task_handle_t *taskHandle) { static ompd_rc_t (*my_release_task_handle)(ompd_task_handle_t *) = NULL; if (!my_release_task_handle) { - my_release_task_handle = dlsym(ompd_library, "ompd_rel_task_handle"); - if (dlerror()) { + my_release_task_handle = get_dlsym_for_name("ompd_rel_task_handle"); + if (get_error()) { return ompd_rc_error; } } @@ -449,8 +447,8 @@ ompd_task_handle_compare(ompd_task_handle_t *task_handle_1, static ompd_rc_t (*my_task_handle_compare)( ompd_task_handle_t *, ompd_task_handle_t *, int *) = NULL; if (!my_task_handle_compare) { - my_task_handle_compare = dlsym(ompd_library, "ompd_task_handle_compare"); - if (dlerror()) { + my_task_handle_compare = get_dlsym_for_name("ompd_task_handle_compare"); + if (get_error()) { return ompd_rc_error; } } @@ -464,8 +462,8 @@ ompd_get_display_control_vars(ompd_address_space_handle_t *address_space_handle, ompd_address_space_handle_t *, const char *const **) = NULL; if (!my_ompd_get_display_control_vars) { my_ompd_get_display_control_vars = - dlsym(ompd_library, "ompd_get_display_control_vars"); - if (dlerror()) { + get_dlsym_for_name("ompd_get_display_control_vars"); + if (get_error()) { return ompd_rc_error; } } @@ -480,15 +478,15 @@ ompd_get_display_control_vars(ompd_address_space_handle_t *address_space_handle, */ static PyObject *ompd_open(PyObject *self, PyObject *args) { const char *name, *dlerr; - dlerror(); + get_error(); if (!PyArg_ParseTuple(args, "s", &name)) { return Py_BuildValue("i", -1); } - ompd_library = dlopen(name, RTLD_LAZY); - if ((dlerr = dlerror())) { + get_library_with_name(name); + if (get_error()) { return Py_BuildValue("i", -2); } - if (dlerror()) { + if (get_error()) { return Py_BuildValue("i", -3); } ompd_word_t version; @@ -825,7 +823,7 @@ static PyObject *call_ompd_initialize(PyObject *self, PyObject *noargs) { NULL, _read_string, _endianess, _endianess, _thread_context}; ompd_rc_t (*my_ompd_init)(ompd_word_t version, ompd_callbacks_t *) = - dlsym(ompd_library, "ompd_initialize"); + get_dlsym_for_name("ompd_initialize"); ompd_rc_t returnInit = my_ompd_init(201811, &table); if (returnInit != ompd_rc_ok) { _printf("An error occurred when calling ompd_initialize! Error code: %d", @@ -834,7 +832,7 @@ static PyObject *call_ompd_initialize(PyObject *self, PyObject *noargs) { ompd_address_space_handle_t *addr_space = NULL; ompd_rc_t (*my_proc_init)(ompd_address_space_context_t *, ompd_address_space_handle_t **) = - dlsym(ompd_library, "ompd_process_initialize"); + get_dlsym_for_name("ompd_process_initialize"); ompd_rc_t retProcInit = my_proc_init(&acontext, &addr_space); if (retProcInit != ompd_rc_ok) { _printf("An error occurred when calling ompd_process_initialize! Error " diff --git a/openmp/libompd/gdb-plugin/setup.py b/openmp/libompd/gdb-plugin/setup.py index ab5cb949bc67e..eee386292c211 100644 --- a/openmp/libompd/gdb-plugin/setup.py +++ b/openmp/libompd/gdb-plugin/setup.py @@ -4,8 +4,14 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) omp_include_dir = os.environ.get('LIBOMP_INCLUDE_DIR', dir_path) +llvm_include_dir = os.environ.get('LLVM_MAIN_INCLUDE_DIR', dir_path) python_include_dir = os.environ.get('PYTHON_HEADERS', dir_path) +# Needed for dlsym +clang_cpp = os.environ.get('CLANG_CPP', dir_path) +clang_cpp_dir = clang_cpp.split('libclang-cpp.so') +install_lib_loc = os.environ.get('INSTALL_LIB_LOC', dir_path) + print("find_packages : ", find_packages()) setup( name='ompd', @@ -13,5 +19,5 @@ py_modules=['loadompd'], setup_requires=['wheel'], packages=find_packages(), - ext_modules=[Extension('ompd.ompdModule', [dir_path+'/ompdModule.c', dir_path+'/ompdAPITests.c'], include_dirs=[omp_include_dir])] + ext_modules=[Extension('ompd.ompdModule', [dir_path+'/ompdModule.c', dir_path+'/ompdAPITests.c', dir_path+'/DLSymService.cpp'], include_dirs=[omp_include_dir, llvm_include_dir], runtime_library_dirs=[install_lib_loc], libraries=['clang-cpp'], library_dirs=[clang_cpp_dir[0]])] ) diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h index 65957a17ae81c..012c0cea41fdb 100644 --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -319,6 +319,40 @@ void __kmpc_xteam_min_i(int, int *); void __kmpc_xteam_min_ui(unsigned int, unsigned int *); void __kmpc_xteam_min_l(long int, long int *); void __kmpc_xteam_min_ul(unsigned long, unsigned long *); + +/// __kmpc_xteamr__: Helper functions for Cross Team reductions +/// arg1: the thread local reduction value. +/// arg2: pointer to where result is written. +/// arg3: global array of team values for this reduction instance. +/// arg4: atomic counter of completed teams for this reduction instance. +void __kmpc_xteamr_sum_d(double, double *, double *, uint32_t *); +void __kmpc_xteamr_sum_f(float, float *, float *, uint32_t *); +void __kmpc_xteamr_sum_cd(double _Complex, double _Complex *, double _Complex *, + uint32_t *); +void __kmpc_xteamr_sum_cf(float _Complex, float _Complex *, float _Complex *, + uint32_t *); +void __kmpc_xteamr_sum_i(int, int *, int *, uint32_t *); +void __kmpc_xteamr_sum_ui(unsigned int, unsigned int *, unsigned int *, + uint32_t *); +void __kmpc_xteamr_sum_l(long int, long int *, long int *, uint32_t *); +void __kmpc_xteamr_sum_ul(unsigned long, unsigned long *, unsigned long *, + uint32_t *); +void __kmpc_xteamr_max_d(double, double *, double *, uint32_t *); +void __kmpc_xteamr_max_f(float, float *, float *, uint32_t *); +void __kmpc_xteamr_max_i(int, int *, int *, uint32_t *); +void __kmpc_xteamr_max_ui(unsigned int, unsigned int *, unsigned int *, + uint32_t *); +void __kmpc_xteamr_max_l(long int, long int *, long int *, uint32_t *); +void __kmpc_xteamr_max_ul(unsigned long, unsigned long *, unsigned long *, + uint32_t *); +void __kmpc_xteamr_min_d(double, double *, double *, uint32_t *); +void __kmpc_xteamr_min_f(float, float *, float *, uint32_t *); +void __kmpc_xteamr_min_i(int, int *, int *, uint32_t *); +void __kmpc_xteamr_min_ui(unsigned int, unsigned int *, unsigned int *, + uint32_t *); +void __kmpc_xteamr_min_l(long int, long int *, long int *, uint32_t *); +void __kmpc_xteamr_min_ul(unsigned long, unsigned long *, unsigned long *, + uint32_t *); ///} /// Synchronization diff --git a/openmp/libomptarget/DeviceRTL/include/Xteamr.h b/openmp/libomptarget/DeviceRTL/include/Xteamr.h index 052e699c6c39e..f0947c0777ce8 100644 --- a/openmp/libomptarget/DeviceRTL/include/Xteamr.h +++ b/openmp/libomptarget/DeviceRTL/include/Xteamr.h @@ -55,294 +55,928 @@ extern "C" { /// \param Reduction null value /// \param Outer loop iteration value, 0 to numteams*numthreads /// \param Number of teams + +/// External intra-team reduction (iteamr) helper functions +/// +/// The name template for intra-team helper functions is +/// __kmpc_iteamr__x where +/// is letter(s) representing data type, e.g. d=double +/// number of waves in thread block +/// warp size, 32 or 64 +/// All iteamr helper functions are defined in Xteamr.cpp. They each call the +/// internal templated function _iteam_reduction also defined in Xteamr.cpp. +/// +/// \param Input thread local reduction value +/// \param Pointer to result value +/// \param Function pointer to reduction function (sum,min,max) +/// \param Function pointer to reduction function on LDS memory +/// \param Reduction null value +/// \param Outer loop iteration value, 0 to numthreads +/// +/// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_d_16x64( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_16x64(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_f_16x64( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_16x64(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_cd_16x64( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_16x64(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_cf_16x64( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_16x64(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_i_16x64( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_16x64(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_ui_16x64( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_16x64(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_l_16x64( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_16x64(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_ul_16x64( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_16x64(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_d_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_d_8x64( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_8x64(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_f_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_f_8x64( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_8x64(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cd_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_cd_8x64( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_8x64(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cf_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_cf_8x64( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_8x64(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_i_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_i_8x64( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_8x64(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ui_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_ui_8x64( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_8x64(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_l_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_l_8x64( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_8x64(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ul_32x32( +void _INLINE_ATTR_ __kmpc_xteamr_ul_8x64( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_8x64(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_d_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_d_4x64( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_4x64(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_f_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_f_4x64( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_4x64(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cd_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_cd_4x64( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_4x64(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cf_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_cf_4x64( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_4x64(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_i_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_i_4x64( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_4x64(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ui_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_ui_4x64( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_4x64(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_l_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_l_4x64( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_4x64(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ul_8x64( +void _INLINE_ATTR_ __kmpc_xteamr_ul_4x64( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_4x64(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_d_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_d_2x64( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_2x64(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_f_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_f_2x64( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_2x64(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cd_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_cd_2x64( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_2x64(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cf_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_cf_2x64( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_2x64(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_i_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_i_2x64( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_2x64(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ui_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_ui_2x64( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_2x64(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_l_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_l_2x64( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_2x64(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ul_16x32( +void _INLINE_ATTR_ __kmpc_xteamr_ul_2x64( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_2x64(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_d_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_d_1x64( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_1x64(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_f_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_f_1x64( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_1x64(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cd_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_cd_1x64( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_1x64(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_cf_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_cf_1x64( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_1x64(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_i_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_i_1x64( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_1x64(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ui_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_ui_1x64( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_1x64(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_l_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_l_1x64( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_1x64(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. -void _INLINE_ATTR_ __kmpc_xteamr_ul_4x64( +void _INLINE_ATTR_ __kmpc_xteamr_ul_1x64( + _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_1x64(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_d_32x32( + double v, double *r_ptr, double *tvs, uint32_t *td, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_32x32(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_f_32x32( + float v, float *r_ptr, float *tvs, uint32_t *td, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_32x32(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cd_32x32( + _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_32x32(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cf_32x32( + _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_32x32(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_i_32x32( + int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_32x32(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ui_32x32( + _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_32x32(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_l_32x32( + long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_32x32(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ul_32x32( + _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_32x32(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_d_16x32( + double v, double *r_ptr, double *tvs, uint32_t *td, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_16x32(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_f_16x32( + float v, float *r_ptr, float *tvs, uint32_t *td, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_16x32(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cd_16x32( + _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_16x32(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cf_16x32( + _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_16x32(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_i_16x32( + int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_16x32(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ui_16x32( + _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_16x32(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_l_16x32( + long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_16x32(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ul_16x32( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_16x32(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_d_8x32( double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_8x32(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_f_8x32( float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_8x32(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_cd_8x32( _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_8x32(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_cf_8x32( _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_8x32(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_i_8x32( int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_8x32(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_ui_8x32( _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_8x32(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_l_8x32( long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_8x32(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); /// Cross team reduction (xteamr) helper function, see documentation above. void _INLINE_ATTR_ __kmpc_xteamr_ul_8x32( _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, const uint64_t k, const uint32_t numteams); - -/// Built-in pair reduction functions used as a function pointer in arguments -/// to cross team reduction (xteamr) helper functions defined above. -/// -/// The template for the name of built-in pair reduction functions is -/// __kmpc_rfun__ where -/// is function name (e.g.sum,min,max) -/// is letter(s) representing data type, e.g. d=double -/// -/// All built-in pair reduction functions are defined in Xteamr.cpp. -/// Clang/flang code generation for C, C++, and FORTRAN use function pointers -/// to built-in pair reduction functions when generating a call to xteamr -/// helper functions. -/// -/// \param Pointer to first TLS value where result is placed -/// \param The 2nd TLS value used in the pair reduction function +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_8x32(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_d_4x32( + double v, double *r_ptr, double *tvs, uint32_t *td, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_4x32(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_f_4x32( + float v, float *r_ptr, float *tvs, uint32_t *td, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_4x32(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cd_4x32( + _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_4x32(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cf_4x32( + _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_4x32(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_i_4x32( + int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_4x32(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ui_4x32( + _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_4x32(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_l_4x32( + long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_4x32(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ul_4x32( + _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_4x32(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_d_2x32( + double v, double *r_ptr, double *tvs, uint32_t *td, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_d_2x32(double v, double *r_ptr, + void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, + _RF_LDS double *), + const double rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_f_2x32( + float v, float *r_ptr, float *tvs, uint32_t *td, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_f_2x32(float v, float *r_ptr, + void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, + _RF_LDS float *), + const float rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cd_2x32( + _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cd_2x32(_CD v, _CD *r_ptr, + void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, + _RF_LDS _CD *), + const _CD rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_cf_2x32( + _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_cf_2x32(_CF v, _CF *r_ptr, + void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, + _RF_LDS _CF *), + const _CF rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_i_2x32( + int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_i_2x32(int v, int *r_ptr, + void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, + _RF_LDS int *), + const int rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ui_2x32( + _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ui_2x32(_UI v, _UI *r_ptr, + void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, + _RF_LDS _UI *), + const _UI rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_l_2x32( + long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_l_2x32(long v, long *r_ptr, + void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, + _RF_LDS long *), + const long rnv, const uint64_t k); +/// Cross team reduction (xteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_xteamr_ul_2x32( + _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t numteams); +/// Intra-team reduction (iteamr) helper function, see documentation above. +void _INLINE_ATTR_ __kmpc_iteamr_ul_2x32(_UL v, _UL *r_ptr, + void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, + _RF_LDS _UL *), + const _UL rnv, const uint64_t k); +/// Built-in pair reduction function, see documentation above. void __kmpc_rfun_sum_d(double *val, double otherval); - -/// LDS Built-in pair reduction functions used as a function pointer in -/// arguments to cross team reduction (xteamr) helper functions. -/// The LDS pair reduction function only differs from the pair reduction -/// function in that the arguments use LDS storage. -/// -/// The template for the name of LDS built-in pair reduction functions is -/// __kmpc_rfun__lds_ where -/// is function name (e.g.sum,min,max) -/// is letter(s) representing data type, e.g. d=double -/// -/// All built-in pair reduction functions are defined in Xteamr.cpp. -/// Clang/flang code generation for C, C++, and FORTRAN use function pointers -/// to built-in pair reduction functions when generating a call to xteamr -/// helper functions. -/// -/// \param Pointer to the 1st value in LDS storage where result is placed. -/// \param Pointer to the 2nd value in LDS storage. +/// LDS Built-in pair reduction function, see documentation above. void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval); - /// Built-in pair reduction function, see documentation above. void __kmpc_rfun_sum_f(float *val, float otherval); /// LDS Built-in pair reduction function, see documentation above. diff --git a/openmp/libomptarget/DeviceRTL/src/Xteamr.cpp b/openmp/libomptarget/DeviceRTL/src/Xteamr.cpp index 7dfe532982487..1eb1780bd90ad 100644 --- a/openmp/libomptarget/DeviceRTL/src/Xteamr.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Xteamr.cpp @@ -177,6 +177,51 @@ T xteamr_shfl_xor(T var, const int lane_mask) { return xteamr_shfl_xor<_WSZ>(tag(), var, lane_mask); } +/// Templated internal function used by extern intra-team reductions +/// +/// \param Template typename parameter T +/// \param Template parameter for number of waves, must be power of two +/// \param Template parameter for warp size, 32 o 64 +/// +/// \param Input thread local (TLS) value for warp shfl reduce +/// \param Pointer to result value, also used in final reduction +/// \param Function pointer to TLS pair reduction function +/// \param Function pointer to LDS pair reduction function +/// \param Reduction null value, used for partial waves +/// \param The iteration value from 0 to (NumTeams*_NUM_THREADS)-1 +/// +template +__attribute__((flatten, always_inline)) void _iteam_reduction( + T val, T *r_ptr, void (*_rf)(T *, T), + void (*_rf_lds)(__XTEAM_SHARED_LDS T *, __XTEAM_SHARED_LDS T *), + const T rnv, const uint64_t k) { + constexpr uint32_t _NT = _NW * _WSZ; + const uint32_t omp_thread_num = k % _NT; + const uint32_t wave_num = omp_thread_num / _WSZ; + const uint32_t lane_num = omp_thread_num % _WSZ; + static __XTEAM_SHARED_LDS T xwave_lds[_NW]; + + // Binary reduce each wave, then copy to xwave_lds[wave_num] + for (unsigned int offset = _WSZ / 2; offset > 0; offset >>= 1) + (*_rf)(&val, xteamr_shfl_xor(val, offset)); + if (lane_num == 0) + xwave_lds[wave_num] = val; + + // Binary reduce all wave values into wave_lds[0] + _OMP::synchronize::threadsAligned(); + for (unsigned int offset = _NW / 2; offset > 0; offset >>= 1) { + if (omp_thread_num < offset) + (*_rf_lds)(&(xwave_lds[omp_thread_num]), + &(xwave_lds[omp_thread_num + offset])); + } + + // We only need xwave_lds[0] correct on thread 0. + if (omp_thread_num == 0) + *r_ptr = xwave_lds[0]; + + _OMP::synchronize::threadsAligned(); +} + /// Templated internal function used by all extern typed reductions /// /// \param Template typename parameter T @@ -207,7 +252,7 @@ __attribute__((flatten, always_inline)) void _xteam_reduction( const uint32_t wave_num = omp_thread_num / _WSZ; const uint32_t lane_num = omp_thread_num % _WSZ; - static __XTEAM_SHARED_LDS T xwave_lds[_NW + 1]; + static __XTEAM_SHARED_LDS T xwave_lds[_NW]; // Cuda may restrict max threads, so clear unused wave values #ifdef __NVPTX__ @@ -255,7 +300,6 @@ __attribute__((flatten, always_inline)) void _xteam_reduction( // All other teams exit the helper function. // To use TLS shfl reduce, copy team values to TLS val. - // NumTeams must be <= _NUM_THREADS here. val = (omp_thread_num < NumTeams) ? team_vals[omp_thread_num] : rnv; // Need sync here to prepare for TLS shfl reduce. @@ -267,21 +311,26 @@ __attribute__((flatten, always_inline)) void _xteam_reduction( if (lane_num == 0) xwave_lds[wave_num] = val; - // To get final result, we know wave_lds[0] is done - // Sync needed here to ensure wave_lds[i!=0] are correct. + // Binary reduce all wave values into wave_lds[0] _OMP::synchronize::threadsAligned(); + for (unsigned int offset = _NW / 2; offset > 0; offset >>= 1) { + if (omp_thread_num < offset) + (*_rf_lds)(&(xwave_lds[omp_thread_num]), + &(xwave_lds[omp_thread_num + offset])); + } - // Typically only a few usable waves even for large GPUs. - // No gain parallelizing these last few reductions. - // So do reduction on thread 0 into lane 0's LDS val. if (omp_thread_num == 0) { - unsigned int usableWaves = ((NumTeams - 1) / _WSZ) + 1; // Reduce with the original result value. - xwave_lds[usableWaves] = *r_ptr; - for (unsigned int kk = 1; kk <= usableWaves; kk++) - (*_rf_lds)(&xwave_lds[0], &xwave_lds[kk]); + val = xwave_lds[0]; + (*_rf)(&val, *r_ptr); + + // If more teams than threads, do non-parallel reduction of extra + // team_vals. This loop iterates only if NumTeams>_NT. + for (unsigned int offset = _NT; offset < NumTeams; offset++) + (*_rf)(&val, team_vals[offset]); - *r_ptr = xwave_lds[0]; + // Write over the external result value. + *r_ptr = val; } // This sync needed to prevent warps in last team from starting @@ -297,495 +346,1056 @@ __attribute__((flatten, always_inline)) void _xteam_reduction( // number of waves in the team,and warpsize. // #define _EXT_ATTR extern "C" __attribute__((flatten, always_inline)) void +#define _CD double _Complex +#define _CF float _Complex +#define _UI unsigned int +#define _UL unsigned long +#define _LDS volatile __attribute__((address_space(3))) +_EXT_ATTR +__kmpc_xteamr_d_16x64(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_16x64(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_16x64(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_16x64(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_16x64(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 16, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_16x64(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 16, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_16x64(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 16, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_16x64(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 16, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_16x64(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_16x64(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_16x64(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 16, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_16x64(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 16, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_16x64(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_16x64(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_16x64(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 16, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_16x64(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 16, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_8x64(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_8x64(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_8x64(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_8x64(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_8x64(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 8, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_8x64(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 8, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_8x64(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 8, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_8x64(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 8, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_8x64(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_8x64(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_8x64(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 8, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_8x64(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 8, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_8x64(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_8x64(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_8x64(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 8, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_8x64(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 8, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_4x64(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_4x64(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_4x64(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_4x64(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_4x64(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 4, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_4x64(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 4, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_4x64(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 4, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_4x64(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 4, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_4x64(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_4x64(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_4x64(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 4, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_4x64(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 4, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_4x64(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_4x64(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_4x64(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 4, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_4x64(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 4, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_2x64(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_2x64(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_2x64(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_2x64(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_2x64(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 2, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_2x64(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 2, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_2x64(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 2, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_2x64(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 2, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_2x64(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_2x64(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_2x64(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 2, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_2x64(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 2, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_2x64(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_2x64(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_2x64(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 2, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_2x64(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 2, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_1x64(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_1x64(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_1x64(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_1x64(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_1x64(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 1, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_1x64(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 1, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_1x64(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 1, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_1x64(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 1, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_1x64(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_1x64(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_1x64(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 1, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_1x64(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 1, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_1x64(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_1x64(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_1x64(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 1, 64>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_1x64(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 1, 64>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_32x32(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_32x32(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_32x32(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_32x32(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_32x32(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 32, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_32x32(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 32, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_32x32(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 32, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_32x32(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 32, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_32x32(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_32x32(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_32x32(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 32, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_32x32(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 32, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_32x32(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_32x32(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_32x32(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 32, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_32x32(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 32, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_16x32(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_16x32(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_16x32(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_16x32(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), + const float rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_16x32(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 16, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_16x32(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 16, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_16x32(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 16, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_16x32(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 16, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_16x32(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_16x32(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_16x32(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 16, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_16x32(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 16, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_16x32(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_16x32(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_16x32(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 16, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_16x32(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 16, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_8x32(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_8x32(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} _EXT_ATTR -__kmpc_xteamr_d_16x64(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_16x64(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} - -_EXT_ATTR -__kmpc_xteamr_cd_16x64(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_16x64(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_16x64(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_16x64(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_16x64(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_16x64(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} - -_EXT_ATTR -__kmpc_xteamr_d_8x64(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_8x64(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_cd_8x64(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - const double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_8x64(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - const float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, - iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_8x64(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_8x64(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_8x64(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_8x64(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} - -_EXT_ATTR -__kmpc_xteamr_d_4x64(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_4x64(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_cd_4x64(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - const double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_4x64(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - const float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, - iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_4x64(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_4x64(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_4x64(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_4x64(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} - -_EXT_ATTR -__kmpc_xteamr_d_32x32(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_32x32(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_cd_32x32(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - const double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_32x32(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - const float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_32x32(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_32x32(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_32x32(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_32x32(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} - -_EXT_ATTR -__kmpc_xteamr_d_16x32(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_16x32(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_cd_16x32(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - const double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_16x32(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - const float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_16x32(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_16x32(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_16x32(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_16x32(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} - -_EXT_ATTR -__kmpc_xteamr_d_8x32(double v, double *r_ptr, double *tvals, uint32_t *td_ptr, - void (*_rf)(double *, double), - void (*_rf_lds)(__XTEAM_SHARED_LDS double *, - __XTEAM_SHARED_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_f_8x32(float v, float *r_ptr, float *tvals, uint32_t *td_ptr, - void (*_rf)(float *, float), - void (*_rf_lds)(__XTEAM_SHARED_LDS float *, - __XTEAM_SHARED_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_cd_8x32(double _Complex v, double _Complex *r_ptr, - double _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(double _Complex *, double _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS double _Complex *, - __XTEAM_SHARED_LDS double _Complex *), - const double _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, - _rf_lds, iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_cf_8x32(float _Complex v, float _Complex *r_ptr, - float _Complex *tvals, uint32_t *td_ptr, - void (*_rf)(float _Complex *, float _Complex), - void (*_rf_lds)(__XTEAM_SHARED_LDS float _Complex *, - __XTEAM_SHARED_LDS float _Complex *), - const float _Complex iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, - iv, k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_i_8x32(int v, int *r_ptr, int *tvals, uint32_t *td_ptr, - void (*_rf)(int *, int), - void (*_rf_lds)(__XTEAM_SHARED_LDS int *, - __XTEAM_SHARED_LDS int *), - const int iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ui_8x32(uint32_t v, uint32_t *r_ptr, uint32_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint32_t *, uint32_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint32_t *, - __XTEAM_SHARED_LDS uint32_t *), - const uint32_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); -} -_EXT_ATTR -__kmpc_xteamr_l_8x32(long v, long *r_ptr, long *tvals, uint32_t *td_ptr, - void (*_rf)(long *, long), - void (*_rf_lds)(__XTEAM_SHARED_LDS long *, - __XTEAM_SHARED_LDS long *), - const long iv, const uint64_t k, const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, k, - numteams); -} -_EXT_ATTR -__kmpc_xteamr_ul_8x32(uint64_t v, uint64_t *r_ptr, uint64_t *tvals, - uint32_t *td_ptr, void (*_rf)(uint64_t *, uint64_t), - void (*_rf_lds)(__XTEAM_SHARED_LDS uint64_t *, - __XTEAM_SHARED_LDS uint64_t *), - const uint64_t iv, const uint64_t k, - const uint32_t numteams) { - _xteam_reduction(v, r_ptr, tvals, td_ptr, _rf, _rf_lds, iv, - k, numteams); +__kmpc_xteamr_f_8x32(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); } +_EXT_ATTR +__kmpc_iteamr_f_8x32(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_8x32(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 8, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_8x32(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 8, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_8x32(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 8, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_8x32(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 8, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_8x32(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_8x32(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_8x32(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 8, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_8x32(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 8, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_8x32(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_8x32(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_8x32(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 8, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_8x32(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 8, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_4x32(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_4x32(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_4x32(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_4x32(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_4x32(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 4, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_4x32(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 4, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_4x32(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 4, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_4x32(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 4, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_4x32(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_4x32(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_4x32(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 4, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_4x32(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 4, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_4x32(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_4x32(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_4x32(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 4, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_4x32(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 4, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_d_2x32(double v, double *r_p, double *tvs, uint32_t *td, + void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_d_2x32(double v, double *r_p, void (*rf)(double *, double), + void (*rflds)(_LDS double *, _LDS double *), + const double rnv, const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_f_2x32(float v, float *r_p, float *tvs, uint32_t *td, + void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_f_2x32(float v, float *r_p, void (*rf)(float *, float), + void (*rflds)(_LDS float *, _LDS float *), const float rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cd_2x32(_CD v, _CD *r_p, _CD *tvs, uint32_t *td, + void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CD, 2, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cd_2x32(_CD v, _CD *r_p, void (*rf)(_CD *, _CD), + void (*rflds)(_LDS _CD *, _LDS _CD *), const _CD rnv, + const uint64_t k) { + _iteam_reduction<_CD, 2, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_cf_2x32(_CF v, _CF *r_p, _CF *tvs, uint32_t *td, + void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_CF, 2, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_cf_2x32(_CF v, _CF *r_p, void (*rf)(_CF *, _CF), + void (*rflds)(_LDS _CF *, _LDS _CF *), const _CF rnv, + const uint64_t k) { + _iteam_reduction<_CF, 2, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_i_2x32(int v, int *r_p, int *tvs, uint32_t *td, + void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_i_2x32(int v, int *r_p, void (*rf)(int *, int), + void (*rflds)(_LDS int *, _LDS int *), const int rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ui_2x32(_UI v, _UI *r_p, _UI *tvs, uint32_t *td, + void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UI, 2, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ui_2x32(_UI v, _UI *r_p, void (*rf)(_UI *, _UI), + void (*rflds)(_LDS _UI *, _LDS _UI *), const _UI rnv, + const uint64_t k) { + _iteam_reduction<_UI, 2, 32>(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_l_2x32(long v, long *r_p, long *tvs, uint32_t *td, + void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_l_2x32(long v, long *r_p, void (*rf)(long *, long), + void (*rflds)(_LDS long *, _LDS long *), const long rnv, + const uint64_t k) { + _iteam_reduction(v, r_p, rf, rflds, rnv, k); +} +_EXT_ATTR +__kmpc_xteamr_ul_2x32(_UL v, _UL *r_p, _UL *tvs, uint32_t *td, + void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k, const uint32_t nt) { + _xteam_reduction<_UL, 2, 32>(v, r_p, tvs, td, rf, rflds, rnv, k, nt); +} +_EXT_ATTR +__kmpc_iteamr_ul_2x32(_UL v, _UL *r_p, void (*rf)(_UL *, _UL), + void (*rflds)(_LDS _UL *, _LDS _UL *), const _UL rnv, + const uint64_t k) { + _iteam_reduction<_UL, 2, 32>(v, r_p, rf, rflds, rnv, k); +} +#undef _CD +#undef _CF +#undef _UI +#undef _UL +#undef _LDS // Built-in pair reduction functions used as function pointers for // cross team reduction functions. diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt new file mode 100644 index 0000000000000..70eef5841a2a5 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -0,0 +1,274 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Build the AMDGCN Device RTL bitcode library using clang -ffreestanding +# +##===----------------------------------------------------------------------===## + +if ( WIN32 ) + libomptarget_say("Not buildingAMDGCN deviceRTL on windows") + return() +endif() +find_package(AMDDeviceLibs REQUIRED CONFIG PATHS + ${CMAKE_BINARY_DIR}/../../tools/ROCMDEVLIBS + ${CMAKE_INSTALL_PREFIX} + /opt/rocm) +if(AMDDeviceLibs_DIR) + libomptarget_say("Getting ROCm device libs from ${AMDDeviceLibs_DIR}") +else() + libomptarget_say("Not building AMDGCN device RTL: Could not find AMDDeviceLibs package") + return() +endif() + +set(LIBOMPTARGET_BUILD_AMDGCN_BCLIB TRUE CACHE BOOL + "Can be set to false to disable building this library.") + +if (NOT LIBOMPTARGET_BUILD_AMDGCN_BCLIB) + libomptarget_say("Not building AMDGCN device RTL: Disabled by LIBOMPTARGET_BUILD_AMDGCN_BCLIB") + return() +endif() + +if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) + libomptarget_say("Not building AMDGCN device RTL: Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS") + return() +endif() + +if (LLVM_DIR) + libomptarget_say("Building AMDGCN device RTLs. Using clang: ${CLANG_TOOL}") +elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) + # LLVM in-tree builds may use CMake target names to discover the tools. + libomptarget_say("Building AMDGCN device RTL. Using clang from in-tree build: ${CLANG_TOOL}") +else() + libomptarget_say("Not building AMDGCN device RTL. No appropriate clang found") + return() +endif() + +if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") + set(aux_triple x86_64-unknown-linux-gnu) +elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "ppc64le") + set(aux_triple powerpc64le-unknown-linux-gnu) +elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + set(aux_triple aarch64-unknown-linux-gnu) +else() + libomptarget_say("Not building AMDGCN device RTL: unknown host arch: ${CMAKE_HOST_SYSTEM_PROCESSOR}") + return() +endif() + +if (LLVM_DIR) + # Builds that use pre-installed LLVM have LLVM_DIR set. + find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL)) + libomptarget_say("Not building AMDGCN device RTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} or opt: ${OPT_TOOL}") + return() + else() + libomptarget_say("Building AMDGCN device RTL. Using clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} and opt: ${OPT_TOOL}") + endif() +elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) + # LLVM in-tree builds may use CMake target names to discover the tools. + set(CLANG_TOOL $) + set(LINK_TOOL $) + set(OPT_TOOL $) + libomptarget_say("Building AMDGCN device RTL. Using clang from in-tree build") +else() + libomptarget_say("Not building AMDGCN device RTL. No appropriate clang found") + return() +endif() + +project(omptarget-amdgcn) + +add_custom_target(omptarget-amdgcn ALL) + +#optimization level +set(optimization_level 2) +set(clang_opt_flags -O${optimization_level} -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=2048) +set(link_opt_flags -O${optimization_level} -openmp-opt-disable) + +# Activate RTL message dumps if requested by the user. +if(LIBOMPTARGET_NVPTX_DEBUG) + set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g) +endif() + +get_filename_component(devicertl_base_directory + ${CMAKE_CURRENT_SOURCE_DIR} + DIRECTORY) + +set(hip_sources + ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_libcall.hip + ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_locks.hip + ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_smid.hip + ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.hip + ${devicertl_base_directory}/common/src/cancel.cu + ${devicertl_base_directory}/common/src/critical.cu + ${devicertl_base_directory}/common/src/data_sharing.cu + ${devicertl_base_directory}/common/src/libcall.cu + ${devicertl_base_directory}/common/src/loop.cu + ${devicertl_base_directory}/common/src/omp_data.cu + ${devicertl_base_directory}/common/src/omptarget.cu + ${devicertl_base_directory}/common/src/parallel.cu + ${devicertl_base_directory}/common/src/reduction.cu + ${devicertl_base_directory}/common/src/support.cu + ${devicertl_base_directory}/common/src/shuffle.cpp + ${devicertl_base_directory}/common/src/sync.cu + ${devicertl_base_directory}/common/src/task.cu + ${devicertl_base_directory}/common/src/ompd-specific.cu) + +set(h_files + ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h + ${devicertl_base_directory}/common/debug.h + ${devicertl_base_directory}/common/omptarget.h + ${devicertl_base_directory}/common/omptargeti.h + ${devicertl_base_directory}/common/state-queue.h + ${devicertl_base_directory}/common/state-queuei.h + ${devicertl_base_directory}/common/support.h) + +# create gfx bitcode libraries +set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103) +if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) + set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) +endif() +set(amdgpu_32bitwf gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103) + +# see if this build is for LLVM_ENABLE_RUNTIMES='openmp' +set(_xdir "") +foreach(proj ${LLVM_ENABLE_RUNTIMES}) + string(TOUPPER "${proj}" canon_name) + if ("${canon_name}" STREQUAL "OPENMP") + set(_xdir "/openmp") + endif() +endforeach() + +# Prepend -I to each list element +set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}") +list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN PREPEND "-I") + +macro(build_bc_library mcpu target_libname) + set(clang_cmd ${CLANG_TOOL} + -xc++ + -c + -std=c++14 + -target amdgcn-amd-amdhsa + -ffreestanding + -emit-llvm + -Xclang -aux-triple -Xclang ${aux_triple} + -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device + -D__AMDGCN__ + -Xclang -target-cpu -Xclang ${mcpu} + -fvisibility=hidden + -Wno-unused-value + -nogpulib + ${clang_opt_flags} + ${CUDA_DEBUG} + -I${CMAKE_CURRENT_SOURCE_DIR}/src + -I${devicertl_base_directory}/common/include + -I${devicertl_base_directory} + -I${devicertl_base_directory}/../include + ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN}) + + set(source_bc_files) + + foreach(file ${hip_sources}) + get_filename_component(fname ${file} NAME_WE) + set(source_bc_file ${fname}.${mcpu}.bc) + + add_custom_command( + OUTPUT ${source_bc_file} + COMMAND ${clang_cmd} ${file} -o ${source_bc_file} + DEPENDS ${file} ${h_files}) + + list(APPEND source_bc_files ${source_bc_file}) + endforeach() + + string(LENGTH "${mcpu}" gfxlen) + if(gfxlen EQUAL 6) + string(SUBSTRING ${mcpu} 3 3 gfxnum) + else() + string(SUBSTRING ${mcpu} 3 4 gfxnum) + endif() + set(libhostrpc-bc ${CMAKE_BINARY_DIR}/libhostrpc-amdgcn-${mcpu}.bc) + set(libm-bc ${CMAKE_BINARY_DIR}/libm-amdgcn-${mcpu}.bc) + get_target_property(ockl_bc_file ockl LOCATION) + get_target_property(ocml_bc_file ocml LOCATION) + string(FIND "${amdgpu_32bitwf}" "${mcpu}" is_32bit) + if(NOT is_32bit EQUAL -1) + get_target_property(oclc_wf_bc_file oclc_wavefrontsize64_off LOCATION) + else() + get_target_property(oclc_wf_bc_file oclc_wavefrontsize64_on LOCATION) + endif() + get_target_property(oclc_isa_bc_file oclc_isa_version_${gfxnum} LOCATION) + + # Add custom target so targets from other directories + # can be added as dependencies to ensure libm + # and libhostrpc bc files have been built. + add_custom_target(touch-${mcpu} ALL) + add_dependencies(touch-${mcpu} + libm-target-${mcpu} + libhostrpc-target-${mcpu} + ) + + if (EXISTS ${CMAKE_BINARY_DIR}/../../tools/ROCMDEVLIBS) + add_dependencies(touch-${mcpu} + ockl ocml oclc_wavefrontsize64_on oclc_wavefrontsize64_off oclc_isa_version_${gfxnum}) + endif() + + set(linkout_bc_file "linkout.${mcpu}.bc") + set(opt_bc_file opt-amdgcn-${mcpu}.bc) + add_custom_command( + OUTPUT ${linkout_bc_file} + COMMAND ${LINK_TOOL} ${source_bc_files} ${libm-bc} ${libhostrpc-bc} + ${ocml_bc_file} ${ockl_bc_file} ${oclc_wf_bc_file} ${oclc_isa_bc_file} -o ${linkout_bc_file} + DEPENDS ${source_bc_files} touch-${mcpu} ${toolchain_deps}) + add_custom_command( + OUTPUT ${opt_bc_file} + COMMAND ${OPT_TOOL} ${link_opt_flags} ${linkout_bc_file} -o ${opt_bc_file} + DEPENDS ${linkout_bc_file} ${toolchain_deps}) + add_custom_command( + OUTPUT ${target_libname} + COMMAND ${PREP_TOOL} ${opt_bc_file} -o ${target_libname} + DEPENDS ${opt_bc_file} ${toolchain_deps}) + + # Add a file-level dependency to ensure that llvm-link and opt are up-to-date. + # By default, add_custom_command only builds the tool if the executable is missing + if("${LINK_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${linkout_bc_file} + DEPENDS llvm-link + APPEND) + endif() + if("${OPT_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${opt_bc_file} + DEPENDS opt + APPEND) + endif() + if("${PREP_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${target_libname} + DEPENDS prep-libomptarget-bc + APPEND) + endif() + + add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${target_libname}) +endmacro() + +set(toolchain_deps "") +if(TARGET llvm-link) + list(APPEND toolchain_deps llvm-link) +endif() +if(TARGET opt) + list(APPEND toolchain_deps opt) +endif() +if(TARGET prep-libomptarget-bc) + list(APPEND toolchain_deps prep-libomptarget-bc) +endif() + +set(libname "omptarget-amdgcn") +foreach(mcpu ${amdgpu_mcpus}) + set(bc_libname lib${libname}-${mcpu}.bc) + build_bc_library(${mcpu} ${bc_libname}) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bc_libname} DESTINATION "${DEVEL_PACKAGE}${OPENMP_INSTALL_LIBDIR}") +endforeach() diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h new file mode 100644 index 0000000000000..75f5651184272 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -0,0 +1,116 @@ +//===------- target_impl.h - AMDGCN OpenMP GPU implementation ----- HIP -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations and definitions of target specific functions and constants +// +//===----------------------------------------------------------------------===// +#ifndef OMPTARGET_AMDGCN_TARGET_IMPL_H +#define OMPTARGET_AMDGCN_TARGET_IMPL_H + +#ifndef __AMDGCN__ +#error "amdgcn target_impl.h expects to be compiled under __AMDGCN__" +#endif + +#include "interface.h" +#include "amdgcn_interface.h" + +#include +#include + +#ifdef _OPENMP +// subset of inttypes.h +#define PRId64 "ld" +#define PRIu64 "lu" + +#define DEVICE +#else +#define DEVICE __attribute__((device)) +#endif + +typedef uint64_t __kmpc_impl_lanemask_t; + +#define INLINE inline +#define NOINLINE __attribute__((noinline)) +#define ALIGN(N) __attribute__((aligned(N))) +#define PLUGIN_ACCESSIBLE \ + __attribute__((used)) /* Don't discard values the plugin reads */ \ + __attribute__((weak)) /* We may have multiple definitions */ \ + __attribute__((retain)) /* Also needed to keep values alive */ \ + __attribute__((visibility("protected"))) /* Access via SHT_HASH */ \ + __attribute__((section(".bss"))) /* .bss, can write after load */ + +#include "llvm/Frontend/OpenMP/OMPGridValues.h" + +INLINE constexpr const llvm::omp::GV &getGridValue() { + return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Kernel options +//////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// The following def must match the absolute limit hardwired in the host RTL +// max number of threads per team +enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size }; +enum { WARPSIZE = getGridValue().GV_Warp_Size }; + +// Maximum number of omp state objects per SM allocated statically in global +// memory. +#define OMP_STATE_COUNT 32 + +// FIXME: determine correct number of CUs for each amdgpu +#if defined(__gfx900__) +#define MAX_SM 64 +#elif defined(__gfx906__) +#define MAX_SM 64 +#elif defined(__gfx908__) +#define MAX_SM 120 +#elif defined(__gfx90a__) +#define MAX_SM 110 +#elif defined(__gfx90c__) +#define MAX_SM 120 +#elif defined(__gfx1030__) +#define MAX_SM 72 +#elif defined(__gfx1031__) +#define MAX_SM 40 +#elif defined(__gfx1100__) +#define MAX_SM 84 +#elif defined(__gfx1101__) +#define MAX_SM 64 +#elif defined(__gfx1102__) +#define MAX_SM 40 +#else +#define MAX_SM 120 +#endif + +#define OMP_ACTIVE_PARALLEL_LEVEL 128 + +// Data sharing related quantities, need to match what is used in the compiler. +enum DATA_SHARING_SIZES { + // The size reserved for data in a shared memory slot. + DS_Slot_Size = getGridValue().GV_Slot_Size, + // The slot size that should be reserved for a working warp. + DS_Worker_Warp_Slot_Size = getGridValue().warpSlotSize(), + // The maximum number of warps in use + DS_Max_Warp_Number = getGridValue().maxWarpNumber(), +}; + +enum : __kmpc_impl_lanemask_t { + __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0 +}; + +// The return code of printf is not checked in the call sites in this library. +// A call to a function named printf currently hits some special case handling +// for opencl, which translates to calls that do not presently exist for openmp +// Therefore, for now, stub out printf while building this library. +EXTERN int printf(const char *, ...); +EXTERN char *global_allocate(uint32_t bufsz); +EXTERN int global_free(void *ptr); + +#endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip new file mode 100644 index 0000000000000..128753ebebfb1 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -0,0 +1,281 @@ +//===------- target_impl.hip - AMDGCN OpenMP GPU implementation --- HIP -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definitions of target specific functions +// +//===----------------------------------------------------------------------===// +#pragma omp declare target + +#include "common/omptarget.h" +#include "target_impl.h" +#include "target_interface.h" + +// Initialized with a 64-bit mask with bits set in positions less than the +// thread's lane number in the warp +EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { + uint32_t lane = GetLaneId(); + int64_t ballot = __kmpc_impl_activemask(); + uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1; + return mask & ballot; +} + +// Initialized with a 64-bit mask with bits set in positions greater than the +// thread's lane number in the warp +EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() { + uint32_t lane = GetLaneId(); + if (lane == (WARPSIZE - 1)) + return 0; + uint64_t ballot = __kmpc_impl_activemask(); + uint64_t mask = (~((uint64_t)0)) << (lane + 1); + return mask & ballot; +} + +EXTERN double __kmpc_impl_get_wtick() { return ((double)1E-9); } + +EXTERN double __kmpc_impl_get_wtime() { +#if __gfx700__ || __gfx701__ || __gfx702__ + uint64_t t = __builtin_amdgcn_s_memtime(); +#elif __gfx1100__ || __gfx1101__ || __gfx1102__ || __gfx1103__ + uint64_t t = __builtin_readcyclecounter(); +#else + uint64_t t = __builtin_amdgcn_s_memrealtime(); +#endif + return ((double)1.0 / 745000000.0) * t; +} + +// Warp vote function +EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() { + return __builtin_amdgcn_read_exec(); +} + +EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var, + uint32_t laneDelta, int32_t width) { + int self = GetLaneId(); + int index = self + laneDelta; + index = (int)(laneDelta + (self & (width - 1))) >= width ? self : index; + return __builtin_amdgcn_ds_bpermute(index << 2, var); +} + +// Use of these will hand smoke reduction_teams test +uint32_t __kmpc_L1_Barrier [[clang::loader_uninitialized]]; +#pragma allocate(__kmpc_L1_Barrier) allocator(omp_pteam_mem_alloc) + +// static doesn't work for openmp + shared, the variable is discarded by llc +// and lld then fails to link. Unclear why the variable hasn't been associated +// with the kernel. Dropping the static qualifier for now. + +// static +EXTERN uint32_t SHARED(L1_Barrier); + +EXTERN void __kmpc_impl_target_init() { + // Don't have global ctors, and shared memory is not zero init + __atomic_store_n(&L1_Barrier, 0u, __ATOMIC_RELEASE); +} + +#ifdef FIXME_BREAKS // reduction_teams + uint32_t __kmpc_L0_Barrier [[clang::loader_uninitialized]]; + #pragma allocate(__kmpc_L0_Barrier) allocator(omp_pteam_mem_alloc) + + EXTERN void __kmpc_impl_target_init() { + // Don't have global ctors, and shared memory is not zero init + __atomic_store_n(&__kmpc_L0_Barrier, 0u, __ATOMIC_RELEASE); + } + + EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) { + pteam_mem_barrier(num_threads, &__kmpc_L0_Barrier); +#endif + +EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) { + __atomic_thread_fence(__ATOMIC_ACQUIRE); + + uint32_t num_waves = (num_threads + WARPSIZE - 1) / WARPSIZE; + + // Partial barrier implementation for amdgcn. + // Uses two 16 bit unsigned counters. One for the number of waves to have + // reached the barrier, and one to count how many times the barrier has been + // passed. These are packed in a single atomically accessed 32 bit integer. + // Low bits for the number of waves, assumed zero before this call. + // High bits to count the number of times the barrier has been passed. + + if (num_waves == 0) + __builtin_trap(); + if (num_waves * WARPSIZE != num_threads) + __builtin_trap(); + //if (num_waves >= 0xffffu) + // __builtin_trap(); + + // Increment the low 16 bits once, using the lowest active thread. + uint64_t lowestActiveThread = __kmpc_impl_ffs(__kmpc_impl_activemask()) - 1; + bool isLowest = GetLaneId() == lowestActiveThread; + + if (isLowest) { + uint32_t load = __atomic_fetch_add(&L1_Barrier, 1, + __ATOMIC_RELAXED); // commutative + + // Record the number of times the barrier has been passed + uint32_t generation = load & 0xffff0000u; + + if ((load & 0x0000ffffu) == (num_waves - 1)) { + // Reached num_waves in low bits so this is the last wave. + // Set low bits to zero and increment high bits + load += 0x00010000u; // wrap is safe + load &= 0xffff0000u; // because bits zeroed second + + // Reset the wave counter and release the waiting waves + __atomic_store_n(&L1_Barrier, load, __ATOMIC_RELAXED); + } else { + // more waves still to go, spin until generation counter changes + do { + __builtin_amdgcn_s_sleep(0); + load = __atomic_load_n(&L1_Barrier, __ATOMIC_RELAXED); + } while ((load & 0xffff0000u) == generation); + } + } + __atomic_thread_fence(__ATOMIC_RELEASE); +} + + +namespace { +uint32_t get_grid_dim(uint32_t n, uint16_t d) { + uint32_t q = n / d; + return q + (n > q * d); +} +uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size, + uint16_t group_size) { + uint32_t r = grid_size - group_id * group_size; + return (r < group_size) ? r : group_size; +} +} // namespace + +EXTERN int __kmpc_get_hardware_num_blocks() { + return get_grid_dim(__builtin_amdgcn_grid_size_x(), + __builtin_amdgcn_workgroup_size_x()); +} + +EXTERN int __kmpc_get_hardware_num_threads_in_block() { + return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), + __builtin_amdgcn_grid_size_x(), + __builtin_amdgcn_workgroup_size_x()); +} + +EXTERN unsigned __kmpc_get_warp_size() { + return WARPSIZE; +} + +EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } +EXTERN unsigned GetLaneId() { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { + return __kmpc_get_hardware_num_threads_in_block(); +} + +// global_allocate uses ockl_dm_alloc to manage a global memory heap +extern "C" uint64_t __ockl_dm_alloc(uint64_t bufsz); +extern "C" void __ockl_dm_dealloc(uint64_t ptr); +EXTERN char * global_allocate(uint32_t bufsz) { + uint64_t ptr = __ockl_dm_alloc((uint64_t) bufsz); + return (char*) ptr; +} +EXTERN int global_free(void * ptr) { + __ockl_dm_dealloc((uint64_t) ptr); + return 0; +} + +// Memory +EXTERN void *__kmpc_impl_malloc(size_t t) { return global_allocate(t); } +EXTERN void __kmpc_impl_free(void * ptr) {global_free(ptr);} + +// Atomics +uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) { + return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST); +} +uint32_t __kmpc_atomic_inc(uint32_t *Address, uint32_t Val) { + return __builtin_amdgcn_atomic_inc32(Address, Val, __ATOMIC_SEQ_CST, ""); +} +uint32_t __kmpc_atomic_max(uint32_t *Address, uint32_t Val) { + return __atomic_fetch_max(Address, Val, __ATOMIC_SEQ_CST); +} + +uint32_t __kmpc_atomic_exchange(uint32_t *Address, uint32_t Val) { + uint32_t R; + __atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST); + return R; +} +uint32_t __kmpc_atomic_cas(uint32_t *Address, uint32_t Compare, uint32_t Val) { + (void)__atomic_compare_exchange(Address, &Compare, &Val, false, + __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return Compare; +} + +unsigned long long __kmpc_atomic_exchange(unsigned long long *Address, + unsigned long long Val) { + unsigned long long R; + __atomic_exchange(Address, &Val, &R, __ATOMIC_SEQ_CST); + return R; +} +unsigned long long __kmpc_atomic_add(unsigned long long *Address, + unsigned long long Val) { + return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST); +} + +EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) { + lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF)); + hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32); +} + +EXTERN uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { + return (((uint64_t)hi) << 32) | (uint64_t)lo; +} + +EXTERN void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); } + +EXTERN void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) { + // AMDGCN doesn't need to sync threads in a warp +} + +EXTERN void __kmpc_impl_threadfence() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); +} + +EXTERN void __kmpc_impl_threadfence_block() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); +} + +EXTERN void __kmpc_impl_threadfence_system() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); +} + +// Calls to the AMDGCN layer (assuming 1D layout) +EXTERN int __kmpc_get_hardware_thread_id_in_block() { return __builtin_amdgcn_workitem_id_x(); } +EXTERN int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); } + +#if defined(__gfx90a__) && \ + __has_builtin(__builtin_amdgcn_is_shared) && \ + __has_builtin(__builtin_amdgcn_is_private) && \ + __has_builtin(__builtin_amdgcn_ds_atomic_fadd_f32) && \ + __has_builtin(__builtin_amdgcn_global_atomic_fadd_f32) +// This function is called for gfx90a only and single precision +// floating point type +EXTERN float __kmpc_unsafeAtomicAdd(float* addr, float value) { + if (__builtin_amdgcn_is_shared( + (const __attribute__((address_space(0))) void*)addr)) + return __builtin_amdgcn_ds_atomic_fadd_f32(( + const __attribute__((address_space(3))) float*)addr, value); + else if (__builtin_amdgcn_is_private( + (const __attribute__((address_space(0))) void*)addr)) { + float temp = *addr; + *addr = temp + value; + return temp; + } + return __builtin_amdgcn_global_atomic_fadd_f32( + (const __attribute__((address_space(1))) float*)addr, value); +} +#endif // if defined(gfx90a) && +#pragma omp end declare target diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index 8b33f8cf76491..f6142f8a763bc 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -519,6 +519,31 @@ struct PluginManager { /// Flag to indicate if we use events to ensure the atomicity of /// map clauses or not. Can be modified with an environment variable. const bool UseEventsForAtomicTransfers; + + // Work around for plugins that call dlopen on shared libraries that call + // tgt_register_lib during their initialisation. Stash the pointers in a + // vector until the plugins are all initialised and then register them. + bool maybeDelayRegisterLib(__tgt_bin_desc *Desc) { + if (!RTLsLoaded) { + // Only reachable from libomptarget constructor + DelayedBinDesc.push_back(Desc); + return true; + } else { + return false; + } + } + + void registerDelayedLibraries() { + // Only called by libomptarget constructor + RTLsLoaded = true; + for (auto *Desc : DelayedBinDesc) + __tgt_register_lib(Desc); + DelayedBinDesc.clear(); + } + +private: + bool RTLsLoaded = false; + llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc; }; extern PluginManager *PM; diff --git a/openmp/libomptarget/include/ompt-connector.h b/openmp/libomptarget/include/ompt-connector.h index b1a0ca410bbce..81537a6a87faf 100644 --- a/openmp/libomptarget/include/ompt-connector.h +++ b/openmp/libomptarget/include/ompt-connector.h @@ -18,8 +18,9 @@ //**************************************************************************** // global includes //**************************************************************************** +#include "llvm/Support/DynamicLibrary.h" -#include +#include #include //**************************************************************************** @@ -57,38 +58,52 @@ typedef void (*library_ompt_connect_t)(ompt_start_tool_result_t *result); class library_ompt_connector_t { public: - void connect(ompt_start_tool_result_t *ompt_result) { - initialize(); - if (library_ompt_connect) { - library_ompt_connect(ompt_result); - } - }; - - library_ompt_connector_t(const char *library_name) { - library_connect_routine.append(library_name); - library_connect_routine.append("_ompt_connect"); + library_ompt_connector_t(const char *ident) { + lib_ident.append(ident); is_initialized = false; - }; + } library_ompt_connector_t() = delete; + void connect(ompt_start_tool_result_t *ompt_result) { + initialize(); + if (!library_ompt_connect) + return; + library_ompt_connect(ompt_result); + } + private: void initialize() { - if (is_initialized == false) { - DP("OMPT: library_ompt_connect = %s\n", library_connect_routine.c_str()); - void *vptr = dlsym(NULL, library_connect_routine.c_str()); - // If dlsym fails, library_ompt_connect will be null. connect() checks - // for this condition + if (is_initialized) + return; + + std::string err_msg; + std::string lib_name = lib_ident; + lib_name += ".so"; + + DP("OMPT: Trying to load library %s\n", lib_name.c_str()); + auto dyn_lib_handle = std::make_shared( + llvm::sys::DynamicLibrary::getPermanentLibrary(lib_name.c_str(), + &err_msg)); + if (!dyn_lib_handle->isValid()) { + // The upper layer will bail out if the handle is null. + library_ompt_connect = nullptr; + } else { + auto lib_conn_rtn = lib_ident + "_ompt_connect"; + DP("OMPT: Trying to get address of connection routine %s\n", + lib_conn_rtn.c_str()); library_ompt_connect = reinterpret_cast( - reinterpret_cast(vptr)); - DP("OMPT: library_ompt_connect = %p\n", library_ompt_connect); - is_initialized = true; + dyn_lib_handle->getAddressOfSymbol(lib_conn_rtn.c_str())); } - }; + DP("OMPT: Library connection handle = %p\n", library_ompt_connect); + is_initialized = true; + } private: + /// Ensure initialization occurs only once bool is_initialized; + /// Handle of connect routine provided by source library library_ompt_connect_t library_ompt_connect; - std::string library_connect_routine; + std::string lib_ident; }; #endif diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h index 49e3d5b2997a9..81c57ef531580 100644 --- a/openmp/libomptarget/include/ompt_device_callbacks.h +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -15,6 +15,8 @@ #ifndef _OMPT_DEVICE_CALLBACKS_H #define _OMPT_DEVICE_CALLBACKS_H +#include "llvm/Support/DynamicLibrary.h" + //**************************************************************************** // local includes //**************************************************************************** @@ -239,6 +241,7 @@ class ompt_device_callbacks_t { enabled = false; tracing_enabled = false; tracing_type_enabled = 0; + parent_dyn_lib = nullptr; #define init_name(name, type, code) name##_fn = 0; FOREACH_OMPT_TARGET_CALLBACK(init_name) @@ -316,6 +319,18 @@ class ompt_device_callbacks_t { } } + void compute_parent_dyn_lib(const char *lib_name) { + if (parent_dyn_lib) + return; + std::string err_msg; + parent_dyn_lib = std::make_shared( + llvm::sys::DynamicLibrary::getPermanentLibrary(lib_name, &err_msg)); + } + + std::shared_ptr get_parent_dyn_lib() { + return parent_dyn_lib; + } + void prepare_devices(int number_of_devices) { resize(number_of_devices); }; void register_callbacks(ompt_function_lookup_t lookup) { @@ -352,6 +367,7 @@ class ompt_device_callbacks_t { bool enabled; std::atomic tracing_enabled; std::atomic tracing_type_enabled; + std::shared_ptr parent_dyn_lib; #define declare_name(name, type, code) name##_t name##_fn; FOREACH_OMPT_TARGET_CALLBACK(declare_name) diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h index 565605992966a..f2f69bdd09e49 100644 --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -186,10 +186,8 @@ struct RTLsTy { // Unregister a shared library from all RTLs. void unregisterLib(__tgt_bin_desc *Desc); - // Mutex-like object to guarantee thread-safety and unique initialization - // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag InitFlag; - void loadRTLs(); // not thread-safe + // not thread-safe, called from global constructor (i.e. once) + void loadRTLs(); std::vector archsSupportingManagedMemory = { "gfx908", "gfx90a", "sm_35", "sm_50", "sm_60", "sm_70", "sm_61"}; diff --git a/openmp/libomptarget/libm/src/libm.c b/openmp/libomptarget/libm/src/libm.c index 2699fa4aacf7a..df08cb5cab878 100644 --- a/openmp/libomptarget/libm/src/libm.c +++ b/openmp/libomptarget/libm/src/libm.c @@ -21,6 +21,54 @@ #define __OPENMP_AMDGCN__ #include <__clang_cuda_complex_builtins.h> #include <__clang_hip_math.h> + +#ifndef FORTRAN_NO_LONGER_NEEDS +// Attach Fortran runtimes which are used by Classic Flang +double __f90_dmodulov(double a, double p) { + double d; + d = fmod(a, p); + if (d != 0 && ((a < 0 && p > 0) || (a > 0 && p < 0))) + d += p; + return d; +} + +float __f90_amodulov(float a, float p) { return __f90_dmodulov(a, p); } + +int32_t __f90_modulov(int32_t a, int32_t p) { + int32_t q, r; + + q = a / p; + r = a - q * p; + if (r != 0 && (a ^ p) < 0) { /* signs differ */ + r += p; + } + return r; +} + +int64_t __f90_i8modulov_i8(int64_t a, int64_t p) { + int64_t q, r; + + q = a / p; + r = a - q * p; + if (r != 0 && (a ^ p) < 0) { /* signs differ */ + r += (p); + } + return r; +} + +int16_t __f90_imodulov(int16_t a, int16_t p) { + int32_t q, r; + + q = a / p; + r = a - q * p; + if (r != 0 && (a ^ p) < 0) { /* signs differ */ + r += p; + } + return r; +} + +#endif + #pragma omp end declare target #endif diff --git a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp index 178a61206dc32..3c2cb04af41b5 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp @@ -121,8 +121,9 @@ static hsa_status_t locking_async_memcpy(enum CopyDirection direction, if (err != HSA_STATUS_SUCCESS) return err; *user_locked = false; + hsa_agent_t agents[1] = {agent}; err = - hsa_amd_memory_lock(lockingPtr, size, nullptr, 0, (void **)&lockedPtr); + hsa_amd_memory_lock(lockingPtr, size, agents, 1, (void **)&lockedPtr); if (err != HSA_STATUS_SUCCESS) return err; DP("locking_async_memcpy: lockingPtr=%p lockedPtr=%p Size = %lu\n", diff --git a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp index 3985e700d990d..62798baf289a7 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp @@ -17,8 +17,6 @@ #include #include #include - -#include #include //**************************************************************************** @@ -40,7 +38,8 @@ #define FOREACH_TARGET_FN(macro) \ macro(ompt_set_trace_ompt) macro(ompt_start_trace) macro(ompt_flush_trace) \ macro(ompt_stop_trace) macro(ompt_advance_buffer_cursor) \ - macro(ompt_get_record_ompt) + macro(ompt_get_record_ompt) macro(ompt_get_device_time) \ + macro(ompt_get_record_type) #define fnptr_to_ptr(x) ((void *)(uint64_t)x) @@ -58,6 +57,7 @@ static std::mutex start_trace_mutex; static std::mutex flush_trace_mutex; static std::mutex stop_trace_mutex; static std::mutex advance_buffer_cursor_mutex; +static std::mutex get_record_type_mutex; //**************************************************************************** // global data @@ -74,6 +74,10 @@ typedef int (*libomptarget_ompt_stop_trace_t)(ompt_device_t *); typedef int (*libomptarget_ompt_advance_buffer_cursor_t)( ompt_device_t *, ompt_buffer_t *, size_t, ompt_buffer_cursor_t, ompt_buffer_cursor_t *); +typedef ompt_device_time_t (*libomptarget_ompt_get_device_time_t)( + ompt_device_t *); +typedef ompt_record_t (*libomptarget_ompt_get_record_type_t)( + ompt_buffer_t *, ompt_buffer_cursor_t); libomptarget_ompt_set_trace_ompt_t ompt_set_trace_ompt_fn = nullptr; libomptarget_ompt_start_trace_t ompt_start_trace_fn = nullptr; @@ -81,10 +85,14 @@ libomptarget_ompt_flush_trace_t ompt_flush_trace_fn = nullptr; libomptarget_ompt_stop_trace_t ompt_stop_trace_fn = nullptr; libomptarget_ompt_advance_buffer_cursor_t ompt_advance_buffer_cursor_fn = nullptr; +libomptarget_ompt_get_record_type_t ompt_get_record_type_fn = nullptr; /// Global function to enable/disable queue profiling for all devices extern void ompt_enable_queue_profiling(int enable); +// These are the implementations in the device plugin/RTL +extern ompt_device_time_t devrtl_ompt_get_device_time(ompt_device_t *device); + // Runtime entry-points for device tracing OMPT_API_ROUTINE ompt_set_result_t ompt_set_trace_ompt(ompt_device_t *device, @@ -101,10 +109,14 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_trace_ompt(ompt_device_t *device, ompt_device_callbacks.set_trace_ompt(device, enable, etype); // libomptarget specific if (!ompt_set_trace_ompt_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_set_trace_ompt"); - assert(vptr && "OMPT set trace ompt entry point not found"); - ompt_set_trace_ompt_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_set_trace_ompt"); + assert(vptr && "OMPT set trace ompt entry point not found"); + ompt_set_trace_ompt_fn = + reinterpret_cast(vptr); + } } } return ompt_set_trace_ompt_fn(device, enable, etype); @@ -137,10 +149,14 @@ ompt_start_trace(ompt_device_t *device, ompt_callback_buffer_request_t request, // libomptarget specific if (!ompt_start_trace_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_start_trace"); - assert(vptr && "OMPT start trace entry point not found"); - ompt_start_trace_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_start_trace"); + assert(vptr && "OMPT start trace entry point not found"); + ompt_start_trace_fn = + reinterpret_cast(vptr); + } } } return ompt_start_trace_fn(request, complete); @@ -155,10 +171,14 @@ OMPT_API_ROUTINE int ompt_flush_trace(ompt_device_t *device) { // Protect the function pointer std::unique_lock lck(flush_trace_mutex); if (!ompt_flush_trace_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_flush_trace"); - assert(vptr && "OMPT flush trace entry point not found"); - ompt_flush_trace_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_flush_trace"); + assert(vptr && "OMPT flush trace entry point not found"); + ompt_flush_trace_fn = + reinterpret_cast(vptr); + } } } return ompt_flush_trace_fn(device); @@ -182,10 +202,14 @@ OMPT_API_ROUTINE int ompt_stop_trace(ompt_device_t *device) { ompt_enable_queue_profiling(false /* enable */); if (!ompt_stop_trace_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_stop_trace"); - assert(vptr && "OMPT stop trace entry point not found"); - ompt_stop_trace_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_stop_trace"); + assert(vptr && "OMPT stop trace entry point not found"); + ompt_stop_trace_fn = + reinterpret_cast(vptr); + } } } return ompt_stop_trace_fn(device); @@ -217,15 +241,43 @@ ompt_advance_buffer_cursor(ompt_device_t *device, ompt_buffer_t *buffer, { std::unique_lock lck(advance_buffer_cursor_mutex); if (!ompt_advance_buffer_cursor_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_advance_buffer_cursor"); - assert(vptr && "OMPT advance buffer cursor entry point not found"); - ompt_advance_buffer_cursor_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_advance_buffer_cursor"); + assert(vptr && "OMPT advance buffer cursor entry point not found"); + ompt_advance_buffer_cursor_fn = + reinterpret_cast(vptr); + } } } return ompt_advance_buffer_cursor_fn(device, buffer, size, current, next); } +OMPT_API_ROUTINE ompt_record_t +ompt_get_record_type(ompt_buffer_t *buffer, ompt_buffer_cursor_t current) { + { + std::unique_lock lck(get_record_type_mutex); + if (!ompt_get_record_type_fn) { + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_get_record_type"); + assert(vptr && "OMPT get record type entry point not found"); + ompt_get_record_type_fn = + reinterpret_cast(vptr); + } + } + } + return ompt_get_record_type_fn(buffer, current); +} + +OMPT_API_ROUTINE ompt_device_time_t +ompt_get_device_time(ompt_device_t *device) { + DP("OMPT: Executing ompt_get_device_time\n"); + return devrtl_ompt_get_device_time(device); +} + // End of runtime entry-points for trace records //**************************************************************************** @@ -301,11 +353,10 @@ __attribute__((constructor)) static void ompt_init(void) { ompt_result.initialize = ompt_device_init; ompt_result.finalize = ompt_device_fini; ompt_result.tool_data.value = 0; - ; ompt_device_callbacks.init(); - libomptarget_connector.connect(&ompt_result); + DP("OMPT: Exiting ompt_init\n"); } #endif diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 8cacf1fae0cd5..befd56617bb66 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -545,7 +544,7 @@ class HSAQueueScheduler { /// Class containing all the device information class RTLDeviceInfoTy : HSALifetime { - enum : uint8_t { NUM_QUEUES_PER_DEVICE = 4 }; + enum : uint8_t { NUM_QUEUES_PER_DEVICE = 1 }; std::vector> FuncGblEntries; struct QueueDeleter { @@ -1378,6 +1377,9 @@ pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; static RTLDeviceInfoTy DeviceInfoState; static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; } +int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; } + /// Global function for enabling/disabling queue profiling, used for OMPT trace /// records. void ompt_enable_queue_profiling(int enable) { @@ -1391,8 +1393,13 @@ static void ensureTimestampFn() { std::unique_lock timestamp_fn_lck(ompt_set_timestamp_mtx); if (ompt_set_timestamp_fn) return; - void *vptr = dlsym(NULL, "libomptarget_ompt_set_timestamp"); - assert(vptr && "OMPT set timestamp entry point not found"); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib == nullptr || !libomptarget_dyn_lib->isValid()) + return; + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_set_timestamp"); + if (!vptr) + return; ompt_set_timestamp_fn = reinterpret_cast(vptr); } @@ -1411,8 +1418,10 @@ static void recordCopyTimingInNs(hsa_signal_t signal) { ensureTimestampFn(); // No need to hold a lock // Factor in the frequency - ompt_set_timestamp_fn(time_rec.start * DeviceInfo().TicksToTime, - time_rec.end * DeviceInfo().TicksToTime); + if (ompt_set_timestamp_fn) { + ompt_set_timestamp_fn(time_rec.start * DeviceInfo().TicksToTime, + time_rec.end * DeviceInfo().TicksToTime); + } } /// Get the HSA system timestamps for the input agent and signal associated @@ -1430,8 +1439,10 @@ static void recordKernelTimingInNs(hsa_signal_t signal, hsa_agent_t agent) { ensureTimestampFn(); // No need to hold a lock // Factor in the frequency - ompt_set_timestamp_fn(time_rec.start * DeviceInfo().TicksToTime, - time_rec.end * DeviceInfo().TicksToTime); + if (ompt_set_timestamp_fn) { + ompt_set_timestamp_fn(time_rec.start * DeviceInfo().TicksToTime, + time_rec.end * DeviceInfo().TicksToTime); + } } /// Get the current HSA system timestamp @@ -1456,7 +1467,8 @@ struct OmptTimestampRAII { void setTimestamp() { uint64_t EndTime = getSystemTimestampInNs(); ensureTimestampFn(); - ompt_set_timestamp_fn(StartTime, EndTime); + if (ompt_set_timestamp_fn) + ompt_set_timestamp_fn(StartTime, EndTime); } }; @@ -1517,12 +1529,22 @@ class AMDGPUAsyncInfoDataTy { memcpy(HstPtr, HstOrPoolPtr, Size); } - DeviceInfo().FreeSignalPool.push(signal); alreadyCompleted = true; return err; } hsa_status_t releaseResources() { +#ifdef OMPTARGET_DEBUG + DP("releaseResource for HstPtr %p\t HstOrPoolPtr %p\n", HstPtr, + HstOrPoolPtr); +#endif + OMPT_IF_TRACING_ENABLED(recordCopyTimingInNs(signal);); + + // Free signal once it's no longer in use. + // This *should* always be safe to do at this point as the signal is either + // waited for directly or as part of a kernel launch AND-Barrier cascade. + DeviceInfo().FreeSignalPool.push(signal); + if (userLocked) return HSA_STATUS_SUCCESS; @@ -1923,6 +1945,8 @@ void getLaunchVals(uint16_t &ThreadsPerGroup, int &NumGroups, int WarpSize, if (ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD || ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP || + ExecutionMode == llvm::omp::OMPTgtExecModeFlags:: + OMP_TGT_EXEC_MODE_SPMD_BIG_JUMP_LOOP || ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_XTEAM_RED) { // ConstWGSize is used for communicating any command-line value to @@ -1951,11 +1975,51 @@ void getLaunchVals(uint16_t &ThreadsPerGroup, int &NumGroups, int WarpSize, return; } - // For optimized reduction, we use as many teams as the number of CUs. This - // must be kept in sync with CodeGen and DeviceRTL. + if (ExecutionMode == + llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD_BIG_JUMP_LOOP) { + // Cannot assert a non-zero tripcount. Instead, launch with 1 team + // if the tripcount is indeed zero. + NumGroups = 1; + if (LoopTripcount > 0) + NumGroups = ((LoopTripcount - 1) / ThreadsPerGroup) + 1; + + // Honor num_teams clause but lower it if tripcount dictates so. + if (NumTeams > 0 && + NumTeams <= static_cast(RTLDeviceInfoTy::HardTeamLimit)) + NumGroups = std::min(NumTeams, NumGroups); + else { + // num_teams clause is not specified. Choose lower of tripcount-based + // num-groups and a value that maximizes occupancy. + int NumWavesInGroup = ThreadsPerGroup / WarpSize; + int MaxOccupancyFactor = NumWavesInGroup ? (32 / NumWavesInGroup) : 32; + NumGroups = std::min(NumGroups, MaxOccupancyFactor * DeviceNumCUs); + } + DP("Final %d NumGroups and %d ThreadsPerGroup\n", NumGroups, + ThreadsPerGroup); + return; + } + if (ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_XTEAM_RED) { - NumGroups = DeviceNumCUs; + // Honor num_teams clause. + if (NumTeams > 0 && + NumTeams <= static_cast(RTLDeviceInfoTy::HardTeamLimit)) + NumGroups = NumTeams; + else { + // If num_teams clause is not specified, we allow a max of 2*CU teams. + if (ThreadsPerGroup > 0) + NumGroups = DeviceNumCUs * std::min(2, 1024 / ThreadsPerGroup); + else + NumGroups = DeviceNumCUs; + // Ensure we don't have a large number of teams running if the tripcount + // is low. + int NumGroupsFromTripCount = 1; + if (LoopTripcount > 0) + NumGroupsFromTripCount = ((LoopTripcount - 1) / ThreadsPerGroup) + 1; + NumGroups = std::min(NumGroups, NumGroupsFromTripCount); + } + // For now, we don't allow number of teams beyond 512. + NumGroups = std::min(512, NumGroups); DP("Final %d NumGroups and %d ThreadsPerGroup\n", NumGroups, ThreadsPerGroup); return; @@ -2116,10 +2180,15 @@ void getLaunchVals(uint16_t &ThreadsPerGroup, int &NumGroups, int WarpSize, { std::unique_lock granted_teams_fn_lck(granted_teams_mtx); if (!ompt_set_granted_teams_fn) { - void *vptr = dlsym(NULL, "libomptarget_ompt_set_granted_teams"); - assert(vptr && "OMPT set granted teams entry point not found"); - ompt_set_granted_teams_fn = - reinterpret_cast(vptr); + auto libomptarget_dyn_lib = ompt_device_callbacks.get_parent_dyn_lib(); + if (libomptarget_dyn_lib != nullptr && + libomptarget_dyn_lib->isValid()) { + void *vptr = libomptarget_dyn_lib->getAddressOfSymbol( + "libomptarget_ompt_set_granted_teams"); + assert(vptr && "OMPT set granted teams entry point not found"); + ompt_set_granted_teams_fn = + reinterpret_cast(vptr); + } } } // No need to hold a lock @@ -2597,14 +2666,7 @@ struct DeviceEnvironment { hsa_signal_t Signal; bool UserLocked; - Err = DeviceInfo().freesignalpoolMemcpyH2D(StatePtr, &HostDeviceEnv, - StatePtrSize, DeviceId, - Signal, UserLocked); - if (Err == HSA_STATUS_ERROR) - return Err; - AMDGPUAsyncInfoDataTy AsyncInfo(Signal, &HostDeviceEnv, &HostDeviceEnv, - StatePtrSize, UserLocked); - Err = AsyncInfo.waitToComplete(/*RetrieveToHost*/ true); + Err = hsa_memory_copy(StatePtr, &HostDeviceEnv, StatePtrSize); return Err; } } @@ -2929,9 +2991,6 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image, return true; } -int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; } - int __tgt_rtl_number_of_devices() { // If the construction failed, no methods are safe to call if (DeviceInfo().ConstructionSucceeded) { @@ -3110,6 +3169,7 @@ int32_t __tgt_rtl_init_device(int DeviceId) { OMPT_IF_ENABLED( std::string ompt_gpu_type("AMD "); ompt_gpu_type += GetInfoName; const char *type = ompt_gpu_type.c_str(); + ompt_device_callbacks.compute_parent_dyn_lib("libomptarget.so"); ompt_device_callbacks.ompt_callback_device_initialize(DeviceId, type);); return OFFLOAD_SUCCESS; @@ -3314,19 +3374,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId, } // write ptr to device memory so it can be used by later kernels - hsa_signal_t Signal; - bool UserLocked; - Err = DeviceInfo().freesignalpoolMemcpyH2D(StatePtr, &Ptr, sizeof(void *), - DeviceId, Signal, UserLocked); + Err = hsa_memory_copy(StatePtr, &Ptr, sizeof(void *)); if (Err != HSA_STATUS_SUCCESS) { - DP("memcpy install of state_ptr failed\n"); + DP("Error when copying the device state from host to device\n"); return NULL; } - AMDGPUAsyncInfoDataTy AsyncInfo(Signal, &Ptr, &Ptr, sizeof(void *), - UserLocked); - Err = AsyncInfo.waitToComplete(/*RetrieveToHost*/ true); - if (Err != HSA_STATUS_SUCCESS) - return NULL; } } } @@ -3385,17 +3437,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId, // If unified memory is present any target link variables // can access host addresses directly. There is no longer a // need for device copies. - hsa_signal_t Signal; - bool UserLocked; - Err = DeviceInfo().freesignalpoolMemcpyH2D(Varptr, E->addr, - sizeof(void *), DeviceId, Signal, UserLocked); - if (Err != HSA_STATUS_SUCCESS) - DP("Error when copying USM\n"); - - AMDGPUAsyncInfoDataTy AsyncInfo(Signal, E->addr, E->addr, - sizeof(void *), UserLocked); - AsyncInfo.waitToComplete(/*RetrieveToHost*/ true); - + Err = hsa_memory_copy(Varptr, E->addr, sizeof(void *)); + if (Err != HSA_STATUS_SUCCESS) { + DP("Error when copying linked variables in USM mode\n"); + return NULL; + } DP("Copy linked variable host address (" DPxMOD ")" "to device address (" DPxMOD ")\n", DPxPTR(*((void **)E->addr)), DPxPTR(Varptr)); @@ -3638,10 +3684,18 @@ int32_t __tgt_rtl_data_submit(int DeviceId, void *tgt_ptr, void *hst_ptr, if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - AsyncData.waitToComplete(/*RetrieveToHost*/ false); - AsyncData.releaseResources(); + hsa_status_t Err = AsyncData.waitToComplete(/*RetrieveToHost*/ false); + if (Err != HSA_STATUS_SUCCESS) { + DP("Error while submitting data: waiting memory copy to complete\n"); + return OFFLOAD_FAIL; + } + Err = AsyncData.releaseResources(); + if (Err != HSA_STATUS_SUCCESS) { + DP("Error while submitting data: releasing completion signal\n"); + return OFFLOAD_FAIL; + } - return rc; + return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_data_submit_async(int DeviceId, void *TgtPtr, void *HstPtr, @@ -3654,9 +3708,9 @@ int32_t __tgt_rtl_data_submit_async(int DeviceId, void *TgtPtr, void *HstPtr, reinterpret_cast(AsyncInfo->Queue) ->addMapEnteringInfo(std::move(AsyncData)); return rc; - } else { - return __tgt_rtl_data_submit(DeviceId, TgtPtr, HstPtr, Size); } + + // Fall back to synchronous case return __tgt_rtl_data_submit(DeviceId, TgtPtr, HstPtr, Size); } @@ -3668,9 +3722,18 @@ int32_t __tgt_rtl_data_retrieve(int DeviceId, void *hst_ptr, void *tgt_ptr, if (rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - AsyncData.waitToComplete(/*RetrieveToHost*/ true); - AsyncData.releaseResources(); - return rc; + hsa_status_t err = AsyncData.waitToComplete(/*RetrieveToHost*/ true); + if (err != HSA_STATUS_SUCCESS) { + DP("Error while retrieving data: waiting memory copy to complete\n"); + return OFFLOAD_FAIL; + } + err = AsyncData.releaseResources(); + if (err != HSA_STATUS_SUCCESS) { + DP("Error while retrieving data: releasing completion signal\n"); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; } int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr, @@ -3693,8 +3756,10 @@ int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr, reinterpret_cast(AsyncInfo->Queue) ->addMapExitingInfo(std::move(AsyncData)); return RC; - } else - return __tgt_rtl_data_retrieve(DeviceId, HstPtr, TgtPtr, Size); + } + + // Fall back to synchronous case + return __tgt_rtl_data_retrieve(DeviceId, HstPtr, TgtPtr, Size); } int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr, int32_t) { @@ -3914,3 +3979,11 @@ void __tgt_rtl_print_device_info(int32_t DeviceId) { } } // extern "C" + +///// Target specific OMPT implementations + +// Return the current device time in nanoseconds based on HSA +ompt_device_time_t devrtl_ompt_get_device_time(ompt_device_t *device) { + // TODO: Correctly implement the ompt_device_t *mechanism for device RTL + return getSystemTimestampInNs(); +} diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp index c87ae5458f10a..372ca94128a8f 100644 --- a/openmp/libomptarget/plugins/remote/server/Server.cpp +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -90,8 +90,6 @@ Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context, Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context, const Null *Null, I32 *NumberOfDevices) { - std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); - int32_t Devices = 0; PM->RTLsMtx.lock(); for (auto &RTL : PM->RTLs.AllRTLs) diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt index 61e5aa06d6e79..42f5d98a2f21a 100644 --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -56,7 +56,7 @@ if (OPENMP_ENABLE_LIBOMPTARGET_PROFILING) target_link_libraries(omptarget PRIVATE LLVMSupport) endif() -find_library(LLVM_OFFLOAD_ARCH LLVMOffloadArch HINTS ${LLVM_LIBRARY_DIR} REQUIRED) +find_library(LLVM_OFFLOAD_ARCH LLVMOffloadArch HINTS ${LLVM_LIBRARY_DIR} ${LLVM_BINARY_DIR} PATH_SUFFIXES lib REQUIRED) target_include_directories(omptarget PRIVATE ${LIBOMPTARGET_INCLUDE_DIR}) diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports index 0e7ee67699ea8..bf1df61b6b5bd 100644 --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -84,7 +84,8 @@ VERS1.0 { libomptarget_ompt_stop_trace; libomptarget_ompt_set_granted_teams; libomptarget_ompt_set_timestamp; - libomptarget_ompt_advance_buffer_cursor; + libomptarget_ompt_advance_buffer_cursor; + libomptarget_ompt_get_record_type; omp_get_interop_ptr; omp_get_interop_str; omp_get_interop_int; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 74f050acacdb5..32d9c17881521 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -49,7 +49,9 @@ EXTERN void __tgt_register_requires(int64_t Flags) { /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); - std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); + if (PM->maybeDelayRegisterLib(Desc)) + return; + for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { diff --git a/openmp/libomptarget/src/ompt_callback.cpp b/openmp/libomptarget/src/ompt_callback.cpp index 1f93735998116..fd5f93239f975 100644 --- a/openmp/libomptarget/src/ompt_callback.cpp +++ b/openmp/libomptarget/src/ompt_callback.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include //**************************************************************************** @@ -509,7 +508,7 @@ static void LIBOMPTARGET_GET_TARGET_OPID(uint64_t *device_num, static int libomptarget_ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data) { - DP("enter libomptarget_ompt_initialize!\n"); + DP("OMPT: enter libomptarget_ompt_initialize!\n"); ompt_enabled = true; @@ -525,7 +524,7 @@ static int libomptarget_ompt_initialize(ompt_function_lookup_t lookup, ompt_device_callbacks.register_callbacks(lookup); - DP("exit libomptarget_ompt_initialize!\n"); + DP("OMPT: exit libomptarget_ompt_initialize!\n"); return 0; } @@ -560,7 +559,8 @@ ompt_device_callbacks_t::lookup(const char *interface_function_name) { * constructor *****************************************************************************/ -__attribute__((constructor(102))) static void ompt_init(void) { +void ompt_init() { + DP("OMPT: Entering ompt_init\n"); static library_ompt_connector_t libomp_connector("libomp"); static ompt_start_tool_result_t ompt_result; @@ -569,8 +569,8 @@ __attribute__((constructor(102))) static void ompt_init(void) { ompt_result.tool_data.value = 0; ompt_device_callbacks.init(); - libomp_connector.connect(&ompt_result); + DP("OMPT: Exit ompt_init\n"); } #endif @@ -578,7 +578,8 @@ __attribute__((constructor(102))) static void ompt_init(void) { extern "C" { void libomptarget_ompt_connect(ompt_start_tool_result_t *result) { - DP("OMPT: Enter libomptarget_ompt_connect\n"); + DP("OMPT: Enter libomptarget_ompt_connect: OMPT enabled == %d\n", + ompt_enabled); if (ompt_enabled && result) { libomptarget_rtl_finalizer.register_rtl(result->finalize); result->initialize(ompt_device_callbacks_t::lookup, 0, NULL); @@ -661,4 +662,13 @@ void libomptarget_ompt_set_timestamp(uint64_t start, uint64_t end) { ompt_tr_start_time = start; ompt_tr_end_time = end; } + +// Device-independent entry point to query for the trace format used. +// Currently, only OMPT format is supported. +ompt_record_t libomptarget_ompt_get_record_type(ompt_buffer_t *buffer, + ompt_buffer_cursor_t current) { + // TODO: When different OMPT trace buffer formats supported, this needs to be + // fixed. + return ompt_record_t::ompt_record_ompt; +} } diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index 5ab50fab56ab8..e7d5e877574a9 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -48,6 +48,10 @@ PluginManager *PM; static char *ProfileTraceFile = nullptr; +#ifdef OMPT_SUPPORT +extern void ompt_init(); +#endif + __attribute__((constructor(101))) void init() { DP("Init target library!\n"); @@ -69,6 +73,14 @@ __attribute__((constructor(101))) void init() { // TODO: add a configuration option for time granularity if (ProfileTraceFile) timeTraceProfilerInitialize(500 /* us */, "libomptarget"); + +#ifdef OMPT_SUPPORT + // Initialize OMPT first + ompt_init(); +#endif + + PM->RTLs.loadRTLs(); + PM->registerDelayedLibraries(); } __attribute__((destructor(101))) void deinit() { diff --git a/openmp/libomptarget/test/xteamr/test_xteamr.cpp b/openmp/libomptarget/test/xteamr/test_xteamr.cpp index dab216fd1c5c7..6f344af3c5cc5 100644 --- a/openmp/libomptarget/test/xteamr/test_xteamr.cpp +++ b/openmp/libomptarget/test/xteamr/test_xteamr.cpp @@ -74,8 +74,22 @@ unsigned int ignore_times = #define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_8x32 #define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_4x64 #define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_8x32 +#elif _XTEAM_NUM_THREADS == 128 +#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_2x64 +#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_4x32 +#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_2x64 +#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_4x32 +#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_2x64 +#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_4x32 +#elif _XTEAM_NUM_THREADS == 64 +#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_1x64 +#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_2x32 +#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_1x64 +#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_2x32 +#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_1x64 +#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_2x32 #else -#error Invalid value for _XTEAM_NUM_THREADS. Must be 1024, 512, or 256 +#error Invalid value for _XTEAM_NUM_THREADS. Must be 1024, 512, 256, 128, or 64 #endif // Question to Dhruva, should the limiter include the stride? @@ -106,6 +120,7 @@ unsigned int ignore_times = // Format of BIG_JUMP_LOOP depends on if we optimize for 0 index 1 stride #if _XTEAM_NUM_THREADS == 1024 + #ifdef __OPTIMIZE_INDEX0_STRIDE1 #define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ for (int64_t i = k; i < size; i += nteams * 1024) @@ -116,6 +131,7 @@ unsigned int ignore_times = #endif #elif _XTEAM_NUM_THREADS == 512 + #ifdef __OPTIMIZE_INDEX0_STRIDE1 #define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ for (int64_t i = k; i < size; i += nteams * 512) @@ -124,16 +140,41 @@ unsigned int ignore_times = for (int64_t i = ((k * stride) + offset); i < size; \ i += (nteams * 512 * stride)) #endif -#else + +#elif _XTEAM_NUM_THREADS == 256 + #ifdef __OPTIMIZE_INDEX0_STRIDE1 #define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ for (int64_t i = k; i < size; i += nteams * 256) #else #define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ for (int64_t i = ((k * stride) + offset); i < size; \ - i += (nteams * 256 * stride)) + i += (nteams * 256* stride)) #endif -#endif // end if _XTEAM_NUM_THREADS == 1024, elif, else + +#elif _XTEAM_NUM_THREADS == 128 + +#ifdef __OPTIMIZE_INDEX0_STRIDE1 +#define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ + for (int64_t i = k; i < size; i += nteams * 128) +#else +#define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ + for (int64_t i = ((k * stride) + offset); i < size; \ + i += (nteams * 128* stride)) +#endif + +#else + +#ifdef __OPTIMIZE_INDEX0_STRIDE1 +#define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ + for (int64_t i = k; i < size; i += nteams * 64) +#else +#define _BIG_JUMP_LOOP(nteams, size, stride, offset) \ + for (int64_t i = ((k * stride) + offset); i < size; \ + i += (nteams * 64 * stride)) +#endif + +#endif // end if _XTEAM_NUM_THREADS == 1024, elif,elif .. else #endif // if defined(__NVPTX__) && _XTEAM_NUM_THREADS == 1024 else unsigned int test_run_rc = 0; diff --git a/openmp/libomptarget/test/xteamr/test_xteamr.h b/openmp/libomptarget/test/xteamr/test_xteamr.h index 82bb8539edea2..caf780153d388 100644 --- a/openmp/libomptarget/test/xteamr/test_xteamr.h +++ b/openmp/libomptarget/test/xteamr/test_xteamr.h @@ -9,216 +9,332 @@ #define _INLINE_ATTR_ __attribute__((flatten, always_inline)) // Headers for extern xteamr functions defined in libomptarget DeviceRTL -// are defined here in test application because user apps cannot include +// are defined here in the test header file because user apps cannot include // the DeviceRTL Interface.h header file. #if defined(__AMDGCN__) || defined(__NVPTX__) extern "C" { #define _RF_LDS volatile __attribute__((address_space(3))) -void _INLINE_ATTR_ __kmpc_xteamr_d_16x64( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_f_16x64( - float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_16x64( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_16x64( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_16x64( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_16x64( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_16x64( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_16x64( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_d_32x32( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_f_32x32( - float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_32x32( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_32x32( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_32x32( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_32x32( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_32x32( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_32x32( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_d_8x64( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ -__kmpc_xteamr_f_8x64(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_8x64( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_8x64( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_8x64( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_8x64( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_8x64( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_8x64( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_d_16x32( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_f_16x32( - float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_16x32( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_16x32( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_16x32( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_16x32( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_16x32( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_16x32( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_d_4x64( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ -__kmpc_xteamr_f_4x64(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_4x64( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_4x64( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_4x64( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_4x64( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_4x64( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_4x64( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_d_8x32( - double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ -__kmpc_xteamr_f_8x32(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cd_8x32( - _CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_cf_8x32( - _CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_i_8x32( - int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ui_8x32( - _UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_l_8x32( - long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, - const uint64_t k, const uint32_t numteams); -void _INLINE_ATTR_ __kmpc_xteamr_ul_8x32( - _UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, - const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_16x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_16x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_16x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_16x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_16x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_16x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_16x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_16x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_8x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_8x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_8x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_8x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_8x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_8x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_8x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_8x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_4x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_4x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_4x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_4x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_4x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_4x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_4x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_4x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_2x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_2x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_2x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_2x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_2x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_2x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_2x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_2x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_1x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_1x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_1x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_1x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_1x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_1x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_1x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_1x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_32x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_32x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_32x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_32x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_32x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_32x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_32x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_32x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_16x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_16x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_16x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_16x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_16x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_16x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_16x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_16x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_8x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_8x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_8x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_8x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_8x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_8x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_8x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_8x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_4x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_4x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_4x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_4x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_4x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_4x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_4x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_4x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_d_2x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_f_2x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cd_2x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_cf_2x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_i_2x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ui_2x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_l_2x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams); +void _INLINE_ATTR_ __kmpc_xteamr_ul_2x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams); void __kmpc_rfun_sum_d(double *val, double otherval); void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval); void __kmpc_rfun_sum_f(float *val, float otherval); @@ -270,1174 +386,1177 @@ int __kmpc_get_warp_size(); extern "C" { #undef _RF_LDS #define _RF_LDS -void __kmpc_xteamr_d_16x64(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_16x64(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_16x64(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_16x64(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_16x64(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_16x64(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_16x64(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_16x64(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_d_32x32(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_32x32(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_32x32(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_32x32(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_32x32(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_32x32(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_32x32(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_32x32(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_d_8x64(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_8x64(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_8x64(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_8x64(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_8x64(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_8x64(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_8x64(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_8x64(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_d_16x32(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_16x32(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_16x32(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_16x32(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_16x32(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_16x32(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_16x32(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_16x32(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_d_4x64(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_4x64(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_4x64(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_4x64(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_4x64(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_4x64(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_4x64(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_4x64(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_d_8x32(double v, double *r_ptr, double *tvs, uint32_t *td, - void (*_rf)(double *, double), - void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), - const double iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_f_8x32(float v, float *r_ptr, float *tvs, uint32_t *td, - void (*_rf)(float *, float), - void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), - const float iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cd_8x32(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, - void (*_rf)(_CD *, _CD), - void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), - const _CD iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_cf_8x32(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, - void (*_rf)(_CF *, _CF), - void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), - const _CF iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_i_8x32(int v, int *r_ptr, int *tvs, uint32_t *td, - void (*_rf)(int *, int), - void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), - const int iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ui_8x32(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, - void (*_rf)(_UI *, _UI), - void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), - const _UI iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_l_8x32(long v, long *r_ptr, long *tvs, uint32_t *td, - void (*_rf)(long *, long), - void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), - const long iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_xteamr_ul_8x32(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, - void (*_rf)(_UL *, _UL), - void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), - const _UL iv, const uint64_t k, - const uint32_t numteams){}; -void __kmpc_rfun_sum_d(double *val, double otherval) {} -void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval) {} -void __kmpc_rfun_sum_f(float *val, float otherval) {} -void __kmpc_rfun_sum_lds_f(_RF_LDS float *val, _RF_LDS float *otherval) {} -void __kmpc_rfun_sum_cd(_CD *val, _CD otherval) {} -void __kmpc_rfun_sum_lds_cd(_RF_LDS _CD *val, _RF_LDS _CD *otherval) {} -void __kmpc_rfun_sum_cf(_CF *val, _CF otherval) {} -void __kmpc_rfun_sum_lds_cf(_RF_LDS _CF *val, _RF_LDS _CF *otherval) {} -void __kmpc_rfun_sum_i(int *val, int otherval) {} -void __kmpc_rfun_sum_lds_i(_RF_LDS int *val, _RF_LDS int *otherval) {} -void __kmpc_rfun_sum_ui(_UI *val, _UI otherval) {} -void __kmpc_rfun_sum_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval) {} -void __kmpc_rfun_sum_l(long *val, long otherval) {} -void __kmpc_rfun_sum_lds_l(_RF_LDS long *val, _RF_LDS long *otherval) {} -void __kmpc_rfun_sum_ul(_UL *val, _UL otherval) {} -void __kmpc_rfun_sum_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval) {} -void __kmpc_rfun_max_d(double *val, double otherval) {} -void __kmpc_rfun_max_lds_d(_RF_LDS double *val, _RF_LDS double *otherval) {} -void __kmpc_rfun_max_f(float *val, float otherval) {} -void __kmpc_rfun_max_lds_f(_RF_LDS float *val, _RF_LDS float *otherval) {} -void __kmpc_rfun_max_i(int *val, int otherval) {} -void __kmpc_rfun_max_lds_i(_RF_LDS int *val, _RF_LDS int *otherval) {} -void __kmpc_rfun_max_ui(_UI *val, _UI otherval) {} -void __kmpc_rfun_max_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval) {} -void __kmpc_rfun_max_l(long *val, long otherval) {} -void __kmpc_rfun_max_lds_l(_RF_LDS long *val, _RF_LDS long *otherval) {} -void __kmpc_rfun_max_ul(_UL *val, _UL otherval) {} -void __kmpc_rfun_max_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval) {} -void __kmpc_rfun_min_d(double *val, double otherval) {} -void __kmpc_rfun_min_lds_d(_RF_LDS double *val, _RF_LDS double *otherval) {} -void __kmpc_rfun_min_f(float *val, float otherval) {} -void __kmpc_rfun_min_lds_f(_RF_LDS float *val, _RF_LDS float *otherval) {} -void __kmpc_rfun_min_i(int *val, int otherval) {} -void __kmpc_rfun_min_lds_i(_RF_LDS int *val, _RF_LDS int *otherval) {} -void __kmpc_rfun_min_ui(_UI *val, _UI otherval) {} -void __kmpc_rfun_min_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval) {} -void __kmpc_rfun_min_l(long *val, long otherval) {} -void __kmpc_rfun_min_lds_l(_RF_LDS long *val, _RF_LDS long *otherval) {} -void __kmpc_rfun_min_ul(_UL *val, _UL otherval) {} -void __kmpc_rfun_min_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval) {} +void __kmpc_xteamr_d_16x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_16x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_16x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_16x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_16x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_16x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_16x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_16x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_8x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_8x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_8x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_8x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_8x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_8x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_8x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_8x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_4x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_4x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_4x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_4x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_4x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_4x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_4x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_4x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_2x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_2x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_2x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_2x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_2x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_2x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_2x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_2x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_1x64 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_1x64 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_1x64 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_1x64 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_1x64 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_1x64 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_1x64 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_1x64 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_32x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_32x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_32x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_32x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_32x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_32x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_32x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_32x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_16x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_16x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_16x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_16x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_16x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_16x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_16x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_16x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_8x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_8x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_8x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_8x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_8x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_8x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_8x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_8x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_4x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_4x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_4x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_4x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_4x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_4x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_4x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_4x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_d_2x32 + (double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double), + void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_f_2x32 + (float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float), + void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cd_2x32 + (_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD), + void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_cf_2x32 + (_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF), + void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_i_2x32 + (int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int), + void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ui_2x32 + (_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI), + void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_l_2x32 + (long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long), + void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_xteamr_ul_2x32 + (_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL), + void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv, + const uint64_t k, const uint32_t numteams){}; +void __kmpc_rfun_sum_d(double *val, double otherval){} +void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval){} +void __kmpc_rfun_sum_f(float *val, float otherval){} +void __kmpc_rfun_sum_lds_f(_RF_LDS float *val, _RF_LDS float *otherval){} +void __kmpc_rfun_sum_cd(_CD *val, _CD otherval){} +void __kmpc_rfun_sum_lds_cd(_RF_LDS _CD *val, _RF_LDS _CD *otherval){} +void __kmpc_rfun_sum_cf(_CF *val, _CF otherval){} +void __kmpc_rfun_sum_lds_cf(_RF_LDS _CF *val, _RF_LDS _CF *otherval){} +void __kmpc_rfun_sum_i(int *val, int otherval){} +void __kmpc_rfun_sum_lds_i(_RF_LDS int *val, _RF_LDS int *otherval){} +void __kmpc_rfun_sum_ui(_UI *val, _UI otherval){} +void __kmpc_rfun_sum_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval){} +void __kmpc_rfun_sum_l(long *val, long otherval){} +void __kmpc_rfun_sum_lds_l(_RF_LDS long *val, _RF_LDS long *otherval){} +void __kmpc_rfun_sum_ul(_UL *val, _UL otherval){} +void __kmpc_rfun_sum_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval){} +void __kmpc_rfun_max_d(double *val, double otherval){} +void __kmpc_rfun_max_lds_d(_RF_LDS double *val, _RF_LDS double *otherval){} +void __kmpc_rfun_max_f(float *val, float otherval){} +void __kmpc_rfun_max_lds_f(_RF_LDS float *val, _RF_LDS float *otherval){} +void __kmpc_rfun_max_i(int *val, int otherval){} +void __kmpc_rfun_max_lds_i(_RF_LDS int *val, _RF_LDS int *otherval){} +void __kmpc_rfun_max_ui(_UI *val, _UI otherval){} +void __kmpc_rfun_max_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval){} +void __kmpc_rfun_max_l(long *val, long otherval){} +void __kmpc_rfun_max_lds_l(_RF_LDS long *val, _RF_LDS long *otherval){} +void __kmpc_rfun_max_ul(_UL *val, _UL otherval){} +void __kmpc_rfun_max_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval){} +void __kmpc_rfun_min_d(double *val, double otherval){} +void __kmpc_rfun_min_lds_d(_RF_LDS double *val, _RF_LDS double *otherval){} +void __kmpc_rfun_min_f(float *val, float otherval){} +void __kmpc_rfun_min_lds_f(_RF_LDS float *val, _RF_LDS float *otherval){} +void __kmpc_rfun_min_i(int *val, int otherval){} +void __kmpc_rfun_min_lds_i(_RF_LDS int *val, _RF_LDS int *otherval){} +void __kmpc_rfun_min_ui(_UI *val, _UI otherval){} +void __kmpc_rfun_min_lds_ui(_RF_LDS _UI *val, _RF_LDS _UI *otherval){} +void __kmpc_rfun_min_l(long *val, long otherval){} +void __kmpc_rfun_min_lds_l(_RF_LDS long *val, _RF_LDS long *otherval){} +void __kmpc_rfun_min_ul(_UL *val, _UL otherval){} +void __kmpc_rfun_min_lds_ul(_RF_LDS _UL *val, _RF_LDS _UL *otherval){} #undef _RF_LDS -int __kmpc_get_warp_size() { +int __kmpc_get_warp_size(){ printf("ERROR: executing _kmpc_get_warp_size on host\n"); - return -1; -} + return -1;} } // end extern C -#endif // of definitions for host null functions +#endif // of definitions for host null functions -// These overloaded function definitions are for this test framework +// These overloaded function definitions are for this test framework // (xteamr.cpp) to invoke the extern DexviceRTL helper functions. -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x64(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x64(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_16x64(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_16x64(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x64(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x64(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x64(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x64(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_32x32(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_32x32(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_32x32(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_32x32(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_32x32(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_32x32(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_32x32(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_32x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_32x32(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x64(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x64(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_8x64(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_8x64(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x64(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x64(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x64(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x64(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x32(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x32(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_16x32(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_16x32(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x32(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x32(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x32(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_16x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x32(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_4x64(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_4x64(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_4x64(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_4x64(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_4x64(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_4x64(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_4x64(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_4x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_4x64(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x32(val, rv, tvs, td, __kmpc_rfun_sum_d, - __kmpc_rfun_sum_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x32(val, rv, tvs, td, __kmpc_rfun_sum_f, - __kmpc_rfun_sum_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(_CD val, _CD *rv, _CD *tvs, - uint32_t *td, const _CD iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cd_8x32(val, rv, tvs, td, __kmpc_rfun_sum_cd, - __kmpc_rfun_sum_lds_cd, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(_CF val, _CF *rv, _CF *tvs, - uint32_t *td, const _CF iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_cf_8x32(val, rv, tvs, td, __kmpc_rfun_sum_cf, - __kmpc_rfun_sum_lds_cf, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x32(val, rv, tvs, td, __kmpc_rfun_sum_i, - __kmpc_rfun_sum_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x32(val, rv, tvs, td, __kmpc_rfun_sum_ui, - __kmpc_rfun_sum_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x32(val, rv, tvs, td, __kmpc_rfun_sum_l, - __kmpc_rfun_sum_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_sum_8x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x32(val, rv, tvs, td, __kmpc_rfun_sum_ul, - __kmpc_rfun_sum_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x64(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x64(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x64(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x64(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x64(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x64(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_32x32(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_32x32(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_32x32(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_32x32(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_32x32(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_32x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_32x32(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x64(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x64(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x64(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x64(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x64(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x64(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x32(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x32(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x32(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x32(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x32(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_16x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x32(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_4x64(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_4x64(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_4x64(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_4x64(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_4x64(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_4x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_4x64(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x32(val, rv, tvs, td, __kmpc_rfun_max_d, - __kmpc_rfun_max_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x32(val, rv, tvs, td, __kmpc_rfun_max_f, - __kmpc_rfun_max_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x32(val, rv, tvs, td, __kmpc_rfun_max_i, - __kmpc_rfun_max_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x32(val, rv, tvs, td, __kmpc_rfun_max_ui, - __kmpc_rfun_max_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x32(val, rv, tvs, td, __kmpc_rfun_max_l, - __kmpc_rfun_max_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_max_8x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x32(val, rv, tvs, td, __kmpc_rfun_max_ul, - __kmpc_rfun_max_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x64(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x64(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x64(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x64(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x64(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x64(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_32x32(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_32x32(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_32x32(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_32x32(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_32x32(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_32x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_32x32(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x64(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x64(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x64(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x64(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x64(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x64(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_16x32(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_16x32(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_16x32(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_16x32(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_16x32(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_16x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_16x32(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_4x64(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_4x64(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_4x64(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_4x64(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_4x64(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_4x64(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_4x64(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(double val, double *rv, - double *tvs, uint32_t *td, - const double iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_d_8x32(val, rv, tvs, td, __kmpc_rfun_min_d, - __kmpc_rfun_min_lds_d, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(float val, float *rv, - float *tvs, uint32_t *td, - const float iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_f_8x32(val, rv, tvs, td, __kmpc_rfun_min_f, - __kmpc_rfun_min_lds_f, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(int val, int *rv, int *tvs, - uint32_t *td, const int iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_i_8x32(val, rv, tvs, td, __kmpc_rfun_min_i, - __kmpc_rfun_min_lds_i, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(_UI val, _UI *rv, _UI *tvs, - uint32_t *td, const _UI iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ui_8x32(val, rv, tvs, td, __kmpc_rfun_min_ui, - __kmpc_rfun_min_lds_ui, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(long val, long *rv, long *tvs, - uint32_t *td, const long iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_l_8x32(val, rv, tvs, td, __kmpc_rfun_min_l, - __kmpc_rfun_min_lds_l, iv, k, numteams); -} -void _INLINE_ATTR_ _overload_to_extern_min_8x32(_UL val, _UL *rv, _UL *tvs, - uint32_t *td, const _UL iv, - const uint64_t k, - const uint32_t numteams) { - __kmpc_xteamr_ul_8x32(val, rv, tvs, td, __kmpc_rfun_min_ul, - __kmpc_rfun_min_lds_ul, iv, k, numteams); -} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x64(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x64(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x64(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x64(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_1x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_1x64(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_32x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_32x32(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_16x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x32(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_8x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x32(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_4x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x32(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_d, __kmpc_rfun_sum_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_f, __kmpc_rfun_sum_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (_CD val, _CD *rv, _CD *tvs, uint32_t *td, const _CD iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cd_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_cd, __kmpc_rfun_sum_lds_cd, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (_CF val, _CF *rv, _CF *tvs, uint32_t *td, const _CF iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_cf_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_cf, __kmpc_rfun_sum_lds_cf, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_i, __kmpc_rfun_sum_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_ui, __kmpc_rfun_sum_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_l, __kmpc_rfun_sum_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_sum_2x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x32(val, rv, tvs, td, + __kmpc_rfun_sum_ul, __kmpc_rfun_sum_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x64(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x64(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x64(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x64(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x64(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x64(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x64(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x64(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x64(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x64(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x64(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x64(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x64(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x64(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x64(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x64(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x64(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x64(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x64(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x64(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x64(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x64(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x64(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x64(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_1x64(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_1x64(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_1x64(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_1x64(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_1x64(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_1x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_1x64(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_32x32(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_32x32(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_32x32(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_32x32(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_32x32(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_32x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_32x32(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x32(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x32(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x32(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x32(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x32(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_16x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x32(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x32(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x32(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x32(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x32(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x32(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_8x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x32(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x32(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x32(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x32(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x32(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x32(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_4x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x32(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x32(val, rv, tvs, td, + __kmpc_rfun_max_d, __kmpc_rfun_max_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x32(val, rv, tvs, td, + __kmpc_rfun_max_f, __kmpc_rfun_max_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x32(val, rv, tvs, td, + __kmpc_rfun_max_i, __kmpc_rfun_max_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x32(val, rv, tvs, td, + __kmpc_rfun_max_ui, __kmpc_rfun_max_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x32(val, rv, tvs, td, + __kmpc_rfun_max_l, __kmpc_rfun_max_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_max_2x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x32(val, rv, tvs, td, + __kmpc_rfun_max_ul, __kmpc_rfun_max_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x64(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x64(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x64(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x64(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x64(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x64(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x64(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x64(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x64(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x64(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x64(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x64(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x64(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x64(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x64(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x64(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x64(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x64(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x64(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x64(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x64(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x64(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x64(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x64(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_1x64(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_1x64(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_1x64(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_1x64(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_1x64(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_1x64 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_1x64(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_32x32(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_32x32(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_32x32(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_32x32(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_32x32(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_32x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_32x32(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_16x32(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_16x32(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_16x32(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_16x32(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_16x32(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_16x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_16x32(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_8x32(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_8x32(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_8x32(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_8x32(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_8x32(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_8x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_8x32(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_4x32(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_4x32(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_4x32(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_4x32(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_4x32(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_4x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_4x32(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (double val, double *rv, double *tvs, uint32_t *td, const double iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_d_2x32(val, rv, tvs, td, + __kmpc_rfun_min_d, __kmpc_rfun_min_lds_d, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (float val, float *rv, float *tvs, uint32_t *td, const float iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_f_2x32(val, rv, tvs, td, + __kmpc_rfun_min_f, __kmpc_rfun_min_lds_f, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (int val, int *rv, int *tvs, uint32_t *td, const int iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_i_2x32(val, rv, tvs, td, + __kmpc_rfun_min_i, __kmpc_rfun_min_lds_i, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (_UI val, _UI *rv, _UI *tvs, uint32_t *td, const _UI iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ui_2x32(val, rv, tvs, td, + __kmpc_rfun_min_ui, __kmpc_rfun_min_lds_ui, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (long val, long *rv, long *tvs, uint32_t *td, const long iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_l_2x32(val, rv, tvs, td, + __kmpc_rfun_min_l, __kmpc_rfun_min_lds_l, iv, k, numteams);} +void _INLINE_ATTR_ _overload_to_extern_min_2x32 + (_UL val, _UL *rv, _UL *tvs, uint32_t *td, const _UL iv, const uint64_t k, const uint32_t numteams) + { __kmpc_xteamr_ul_2x32(val, rv, tvs, td, + __kmpc_rfun_min_ul, __kmpc_rfun_min_lds_ul, iv, k, numteams);} #undef _CD #undef _CF #undef _UI diff --git a/openmp/libomptarget/test/xteamr/test_xteamr.sh b/openmp/libomptarget/test/xteamr/test_xteamr.sh index 434f55de3ffab..5700389216f71 100755 --- a/openmp/libomptarget/test/xteamr/test_xteamr.sh +++ b/openmp/libomptarget/test/xteamr/test_xteamr.sh @@ -1,20 +1,26 @@ #!/bin/bash -#== overload_insts_1024.h overloaded instatiations of Xteamr fcts -C++ -*-===// +#=============================== test_xteamr.sh -=============================// # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -#===----------------------------------------------------------------------===// +#===----------------------------------------------------------------------====// # # test_xteamr.sh: Script to test high performance reduction helper functions # in llvm-project/openmp/libomptarget/DeviceRTL/src/Xteamr.cpp -# It compiles and executes test_xteamr.cpp in 3 configs. -# 1024 device threads, 512 dev threads, and 256 dev threads. +# It compiles and executes test_xteamr.cpp in 5 configs. +# 1024 device threads, 512 dev threads, 256 dev threads. +# 128 device threads, and 64 dev threads # # See README file in this directory for more information. +# Example usage: +# export LLVM_INSTALL=/usr/lib/aomp +# export OFFLOAD_ARCH=gfx90a +# export NUM_TEAMS=220 +# ./test_xteamr.sh # -#===----------------------------------------------------------------------===// +#===----------------------------------------------------------------------====// LLVM_INSTALL=${LLVM_INSTALL:-$HOME/llvm} [ ! -f $LLVM_INSTALL/bin/clang ] && echo "ERROR: no LLVM install at $LLVM_INSTALL" && exit 1 @@ -49,15 +55,29 @@ echo " COMPILE with --offload-arch=$OFFLOAD_ARCH $as_arg $nt_args" $LLVM_INSTALL/bin/clang++ -O3 -I. $as_arg $nt_args -fopenmp --offload-arch=$OFFLOAD_ARCH test_xteamr.cpp -o $tmpdir/xteamr_256 $cuda_args -lstdc++ -latomic rc3=$? -[ $rc1 == 0 ] && echo "START EXECUTE xteamr_1024" && $tmpdir/xteamr_1024 > $tmpdir/xteamr_1024.out +nt_args="-D_XTEAM_NUM_THREADS=128 -D_XTEAM_NUM_TEAMS=$NUM_TEAMS" +echo " COMPILE with --offload-arch=$OFFLOAD_ARCH $as_arg $nt_args" +$LLVM_INSTALL/bin/clang++ -O3 -I. $as_arg $nt_args -fopenmp --offload-arch=$OFFLOAD_ARCH test_xteamr.cpp -o $tmpdir/xteamr_128 $cuda_args -lstdc++ -latomic rc4=$? -[ $rc2 == 0 ] && echo "START EXECUTE xteamr_512" && $tmpdir/xteamr_512 > $tmpdir/xteamr_512.out + +nt_args="-D_XTEAM_NUM_THREADS=64 -D_XTEAM_NUM_TEAMS=$NUM_TEAMS" +echo " COMPILE with --offload-arch=$OFFLOAD_ARCH $as_arg $nt_args" +$LLVM_INSTALL/bin/clang++ -O3 -I. $as_arg $nt_args -fopenmp --offload-arch=$OFFLOAD_ARCH test_xteamr.cpp -o $tmpdir/xteamr_64 $cuda_args -lstdc++ -latomic rc5=$? -[ $rc3 == 0 ] && echo "START EXECUTE xteamr_256" && $tmpdir/xteamr_256 > $tmpdir/xteamr_256.out + +[ $rc1 == 0 ] && echo "START EXECUTE xteamr_1024" && $tmpdir/xteamr_1024 > $tmpdir/xteamr_1024.out rc6=$? +[ $rc2 == 0 ] && echo "START EXECUTE xteamr_512" && $tmpdir/xteamr_512 > $tmpdir/xteamr_512.out +rc7=$? +[ $rc3 == 0 ] && echo "START EXECUTE xteamr_256" && $tmpdir/xteamr_256 > $tmpdir/xteamr_256.out +rc8=$? +[ $rc4 == 0 ] && echo "START EXECUTE xteamr_128" && $tmpdir/xteamr_128 > $tmpdir/xteamr_128.out +rc9=$? +[ $rc5 == 0 ] && echo "START EXECUTE xteamr_64" && $tmpdir/xteamr_64 > $tmpdir/xteamr_64.out +rc10=$? echo -rc=$(( $rc1 + $rc2 + $rc3 + $rc4 + $rc5 + $rc6 )) +rc=$(( $rc1 + $rc2 + $rc3 + $rc4 + $rc5 + $rc6 + $rc7 + $rc8 + $rc9 + $rc10 )) if [ $rc != 0 ] ; then echo "ERRORS DETECTED!" else diff --git a/openmp/libomptarget/tools/prep-libomptarget-bc/CMakeLists.txt b/openmp/libomptarget/tools/prep-libomptarget-bc/CMakeLists.txt index d5fe774485be3..06d6d20e4313f 100644 --- a/openmp/libomptarget/tools/prep-libomptarget-bc/CMakeLists.txt +++ b/openmp/libomptarget/tools/prep-libomptarget-bc/CMakeLists.txt @@ -10,5 +10,8 @@ set(LLVM_LINK_COMPONENTS IPO ) add_llvm_tool(prep-libomptarget-bc prep-libomptarget-bc.cpp) +if(NOT DEFINED CMAKE_INSTALL_RPATH) + set_target_properties(prep-libomptarget-bc PROPERTIES INSTALL_RPATH "${LLVM_LIBRARY_DIR}:${LLVM_BINARY_DIR}/lib") +endif() llvm_update_compile_flags(prep-libomptarget-bc) include_directories( ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index ae05e051a0187..b7306e5e4273a 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -511,12 +511,12 @@ extern void omp_display_env(int verbose); # if defined(_OPENMP) && _OPENMP >= 201811 - #pragma omp begin declare variant match(device={kind(host)}) - static inline int omp_is_initial_device(void) { return 1; } - #pragma omp end declare variant #pragma omp begin declare variant match(device={kind(nohost)}) static inline int omp_is_initial_device(void) { return 0; } #pragma omp end declare variant + #pragma omp begin declare variant match(device={kind(host)}) + static inline int omp_is_initial_device(void) { return 1; } + #pragma omp end declare variant # endif /* OpenMP 5.2 */